From 182683814b1846a7de57857801e802198ddc545c Mon Sep 17 00:00:00 2001 From: IanShaw027 <131567472+IanShaw027@users.noreply.github.com> Date: Wed, 14 Jan 2026 10:52:56 +0800 Subject: [PATCH] =?UTF-8?q?refactor(ops):=20=E7=A7=BB=E9=99=A4duration?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E5=91=8A=E8=AD=A6=E6=8C=87=E6=A0=87=EF=BC=8C?= =?UTF-8?q?=E7=AE=80=E5=8C=96=E7=9B=91=E6=8E=A7=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 主要改动: - 移除 p95_latency_ms 和 p99_latency_ms 告警指标类型 - 移除配置中的 latency_p99_ms_max 阈值设置 - 简化健康分数计算(移除latency权重,重新归一化SLA和错误率) - 移除duration相关的诊断规则和阈值检查 - 统一术语:延迟 → 请求时长 - 保留duration数据展示,但不再用于告警判断 - 聚焦TTFT作为主要的响应速度告警指标 影响范围: - Backend: handler, service, models, tests - Frontend: API types, i18n, components --- .../handler/admin/ops_alerts_handler.go | 2 - .../service/ops_alert_evaluator_service.go | 10 --- backend/internal/service/ops_health_score.go | 21 ++---- .../internal/service/ops_health_score_test.go | 11 --- backend/internal/service/ops_settings.go | 5 -- .../internal/service/ops_settings_models.go | 1 - frontend/src/api/admin/ops.ts | 3 - frontend/src/i18n/locales/en.ts | 27 +++---- frontend/src/i18n/locales/zh.ts | 37 ++++------ .../ops/components/OpsAlertRulesCard.vue | 18 ----- .../ops/components/OpsDashboardHeader.vue | 71 ++++++------------- .../ops/components/OpsErrorDetailsModal.vue | 66 +++++++++-------- .../ops/components/OpsRuntimeSettingsCard.vue | 19 +---- .../ops/components/OpsSettingsDialog.vue | 28 ++------ 14 files changed, 92 insertions(+), 227 deletions(-) diff --git a/backend/internal/handler/admin/ops_alerts_handler.go b/backend/internal/handler/admin/ops_alerts_handler.go index 8dce68c8..c9da19c7 100644 --- a/backend/internal/handler/admin/ops_alerts_handler.go +++ b/backend/internal/handler/admin/ops_alerts_handler.go @@ -20,8 +20,6 @@ var validOpsAlertMetricTypes = []string{ "success_rate", "error_rate", "upstream_error_rate", - "p95_latency_ms", - "p99_latency_ms", "cpu_usage_percent", "memory_usage_percent", "concurrency_queue_depth", diff --git a/backend/internal/service/ops_alert_evaluator_service.go b/backend/internal/service/ops_alert_evaluator_service.go index a0c93772..2b619f4d 100644 --- a/backend/internal/service/ops_alert_evaluator_service.go +++ b/backend/internal/service/ops_alert_evaluator_service.go @@ -523,16 +523,6 @@ func (s *OpsAlertEvaluatorService) computeRuleMetric( return 0, false } return overview.UpstreamErrorRate * 100, true - case "p95_latency_ms": - if overview.Duration.P95 == nil { - return 0, false - } - return float64(*overview.Duration.P95), true - case "p99_latency_ms": - if overview.Duration.P99 == nil { - return 0, false - } - return float64(*overview.Duration.P99), true default: return 0, false } diff --git a/backend/internal/service/ops_health_score.go b/backend/internal/service/ops_health_score.go index feb0d843..ea648b8c 100644 --- a/backend/internal/service/ops_health_score.go +++ b/backend/internal/service/ops_health_score.go @@ -32,7 +32,7 @@ func computeDashboardHealthScore(now time.Time, overview *OpsDashboardOverview) } // computeBusinessHealth calculates business health score (0-100) -// Components: SLA (50%) + Error Rate (30%) + Latency (20%) +// Components: SLA (50%) + Error Rate (30%) func computeBusinessHealth(overview *OpsDashboardOverview) float64 { // SLA score: 99.5% → 100, 95% → 0 (linear) slaScore := 100.0 @@ -59,22 +59,9 @@ func computeBusinessHealth(overview *OpsDashboardOverview) float64 { } } - // Latency score: 1s → 100, 10s → 0 (linear) - // Uses P99 of duration (TTFT is less critical for overall health) - latencyScore := 100.0 - if overview.Duration.P99 != nil { - p99 := float64(*overview.Duration.P99) - if p99 > 1000 { - if p99 <= 10000 { - latencyScore = (10000 - p99) / 9000 * 100 - } else { - latencyScore = 0 - } - } - } - - // Weighted combination - return slaScore*0.5 + errorScore*0.3 + latencyScore*0.2 + // Weighted combination (renormalized after removing duration) + const weightSum = 0.8 + return (slaScore*0.5 + errorScore*0.3) / weightSum } // computeInfraHealth calculates infrastructure health score (0-100) diff --git a/backend/internal/service/ops_health_score_test.go b/backend/internal/service/ops_health_score_test.go index 849ba146..859bdd2e 100644 --- a/backend/internal/service/ops_health_score_test.go +++ b/backend/internal/service/ops_health_score_test.go @@ -291,17 +291,6 @@ func TestComputeBusinessHealth(t *testing.T) { wantMin: 95, wantMax: 100, }, - { - name: "latency boundary 1000ms", - overview: &OpsDashboardOverview{ - SLA: 0.995, - ErrorRate: 0, - UpstreamErrorRate: 0, - Duration: OpsPercentiles{P99: intPtr(1000)}, - }, - wantMin: 95, - wantMax: 100, - }, { name: "upstream error dominates", overview: &OpsDashboardOverview{ diff --git a/backend/internal/service/ops_settings.go b/backend/internal/service/ops_settings.go index 53c78fed..6c2f6551 100644 --- a/backend/internal/service/ops_settings.go +++ b/backend/internal/service/ops_settings.go @@ -482,13 +482,11 @@ const SettingKeyOpsMetricThresholds = "ops_metric_thresholds" func defaultOpsMetricThresholds() *OpsMetricThresholds { slaMin := 99.5 - latencyMax := 2000.0 ttftMax := 500.0 reqErrMax := 5.0 upstreamErrMax := 5.0 return &OpsMetricThresholds{ SLAPercentMin: &slaMin, - LatencyP99MsMax: &latencyMax, TTFTp99MsMax: &ttftMax, RequestErrorRatePercentMax: &reqErrMax, UpstreamErrorRatePercentMax: &upstreamErrMax, @@ -538,9 +536,6 @@ func (s *OpsService) UpdateMetricThresholds(ctx context.Context, cfg *OpsMetricT if cfg.SLAPercentMin != nil && (*cfg.SLAPercentMin < 0 || *cfg.SLAPercentMin > 100) { return nil, errors.New("sla_percent_min must be between 0 and 100") } - if cfg.LatencyP99MsMax != nil && *cfg.LatencyP99MsMax < 0 { - return nil, errors.New("latency_p99_ms_max must be >= 0") - } if cfg.TTFTp99MsMax != nil && *cfg.TTFTp99MsMax < 0 { return nil, errors.New("ttft_p99_ms_max must be >= 0") } diff --git a/backend/internal/service/ops_settings_models.go b/backend/internal/service/ops_settings_models.go index 229488a1..9ff83ccb 100644 --- a/backend/internal/service/ops_settings_models.go +++ b/backend/internal/service/ops_settings_models.go @@ -63,7 +63,6 @@ type OpsAlertSilencingSettings struct { type OpsMetricThresholds struct { SLAPercentMin *float64 `json:"sla_percent_min,omitempty"` // SLA低于此值变红 - LatencyP99MsMax *float64 `json:"latency_p99_ms_max,omitempty"` // 延迟P99高于此值变红 TTFTp99MsMax *float64 `json:"ttft_p99_ms_max,omitempty"` // TTFT P99高于此值变红 RequestErrorRatePercentMax *float64 `json:"request_error_rate_percent_max,omitempty"` // 请求错误率高于此值变红 UpstreamErrorRatePercentMax *float64 `json:"upstream_error_rate_percent_max,omitempty"` // 上游错误率高于此值变红 diff --git a/frontend/src/api/admin/ops.ts b/frontend/src/api/admin/ops.ts index 24ac7ad3..dfe4f2ca 100644 --- a/frontend/src/api/admin/ops.ts +++ b/frontend/src/api/admin/ops.ts @@ -653,8 +653,6 @@ export type MetricType = | 'success_rate' | 'error_rate' | 'upstream_error_rate' - | 'p95_latency_ms' - | 'p99_latency_ms' | 'cpu_usage_percent' | 'memory_usage_percent' | 'concurrency_queue_depth' @@ -729,7 +727,6 @@ export interface EmailNotificationConfig { export interface OpsMetricThresholds { sla_percent_min?: number | null // SLA低于此值变红 - latency_p99_ms_max?: number | null // 延迟 P99 高于此值变红 ttft_p99_ms_max?: number | null // TTFT P99高于此值变红 request_error_rate_percent_max?: number | null // 请求错误率高于此值变红 upstream_error_rate_percent_max?: number | null // 上游错误率高于此值变红 diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts index 3c6d8f84..7d7776b4 100644 --- a/frontend/src/i18n/locales/en.ts +++ b/frontend/src/i18n/locales/en.ts @@ -1887,7 +1887,7 @@ export default { totalRequests: 'Total Requests', avgQps: 'Avg QPS', avgTps: 'Avg TPS', - avgLatency: 'Avg Latency', + avgLatency: 'Avg Request Duration', avgTtft: 'Avg TTFT', exceptions: 'Exceptions', requestErrors: 'Request Errors', @@ -1899,7 +1899,7 @@ export default { errors: 'Errors', errorRate: 'error_rate:', upstreamRate: 'upstream_rate:', - latencyDuration: 'Latency (duration_ms)', + latencyDuration: 'Request Duration (ms)', ttftLabel: 'TTFT (first_token_ms)', p50: 'p50:', p90: 'p90:', @@ -1919,7 +1919,7 @@ export default { failedToLoadData: 'Failed to load ops data.', failedToLoadOverview: 'Failed to load overview', failedToLoadThroughputTrend: 'Failed to load throughput trend', - failedToLoadLatencyHistogram: 'Failed to load latency histogram', + failedToLoadLatencyHistogram: 'Failed to load request duration histogram', failedToLoadErrorTrend: 'Failed to load error trend', failedToLoadErrorDistribution: 'Failed to load error distribution', failedToLoadErrorDetail: 'Failed to load error detail', @@ -1927,7 +1927,7 @@ export default { tpsK: 'TPS (K)', top: 'Top:', throughputTrend: 'Throughput Trend', - latencyHistogram: 'Latency Histogram', + latencyHistogram: 'Request Duration Histogram', errorTrend: 'Error Trend', errorDistribution: 'Error Distribution', // Health Score & Diagnosis @@ -1973,14 +1973,7 @@ export default { memoryHigh: 'Memory usage elevated ({usage}%)', memoryHighImpact: 'Memory pressure is high, needs attention', memoryHighAction: 'Monitor memory trends, check for memory leaks', - // Latency diagnostics - latencyCritical: 'Response latency critically high ({latency}ms)', - latencyCriticalImpact: 'User experience extremely poor, many requests timing out', - latencyCriticalAction: 'Check slow queries, database indexes, network latency, and upstream services', - latencyHigh: 'Response latency elevated ({latency}ms)', - latencyHighImpact: 'User experience degraded, needs optimization', - latencyHighAction: 'Analyze slow request logs, optimize database queries and business logic', - ttftHigh: 'Time to first byte elevated ({ttft}ms)', + ttftHigh: 'Time to first token elevated ({ttft}ms)', ttftHighImpact: 'User perceived latency increased', ttftHighAction: 'Optimize request processing flow, reduce pre-processing time', // Error rate diagnostics @@ -2020,7 +2013,7 @@ export default { context: 'Context', status: 'Status', message: 'Message', - latency: 'Latency', + latency: 'Request Duration', action: 'Action', noErrors: 'No errors in this window.', grp: 'GRP:', @@ -2049,7 +2042,7 @@ export default { basicInfo: 'Basic Info', platform: 'Platform', model: 'Model', - latency: 'Latency', + latency: 'Request Duration', ttft: 'TTFT', businessLimited: 'Business Limited', requestPath: 'Request Path', @@ -2398,8 +2391,6 @@ export default { metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red', slaMinPercent: 'SLA Minimum Percentage', slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)', - latencyP99MaxMs: 'Latency P99 Maximum (ms)', - latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)', ttftP99MaxMs: 'TTFT P99 Maximum (ms)', ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)', requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)', @@ -2458,7 +2449,7 @@ export default { tooltips: { totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.', throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.', - latencyHistogram: 'Latency distribution (duration_ms) for successful requests.', + latencyHistogram: 'Request duration distribution (ms) for successful requests.', errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).', errorDistribution: 'Error distribution by status code.', goroutines: @@ -2473,7 +2464,7 @@ export default { sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).', errors: 'Error statistics, including total errors, error rate, and upstream error rate.', upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).', - latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.', + latency: 'Request duration statistics, including p50, p90, p95, p99 percentiles.', ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.', health: 'System health score (0-100), considering SLA, error rate, and resource usage.' }, diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts index 57f5e0cc..d955cc1b 100644 --- a/frontend/src/i18n/locales/zh.ts +++ b/frontend/src/i18n/locales/zh.ts @@ -2031,7 +2031,7 @@ export default { totalRequests: '总请求', avgQps: '平均 QPS', avgTps: '平均 TPS', - avgLatency: '平均延迟', + avgLatency: '平均请求时长', avgTtft: '平均首字延迟', exceptions: '异常数', requestErrors: '请求错误', @@ -2043,7 +2043,7 @@ export default { errors: '错误', errorRate: '错误率:', upstreamRate: '上游错误率:', - latencyDuration: '延迟(毫秒)', + latencyDuration: '请求时长(毫秒)', ttftLabel: '首字延迟(毫秒)', p50: 'p50', p90: 'p90', @@ -2063,7 +2063,7 @@ export default { failedToLoadData: '加载运维数据失败', failedToLoadOverview: '加载概览数据失败', failedToLoadThroughputTrend: '加载吞吐趋势失败', - failedToLoadLatencyHistogram: '加载延迟分布失败', + failedToLoadLatencyHistogram: '加载请求时长分布失败', failedToLoadErrorTrend: '加载错误趋势失败', failedToLoadErrorDistribution: '加载错误分布失败', failedToLoadErrorDetail: '加载错误详情失败', @@ -2071,7 +2071,7 @@ export default { tpsK: 'TPS(千)', top: '最高:', throughputTrend: '吞吐趋势', - latencyHistogram: '延迟分布', + latencyHistogram: '请求时长分布', errorTrend: '错误趋势', errorDistribution: '错误分布', // Health Score & Diagnosis @@ -2117,15 +2117,8 @@ export default { memoryHigh: '内存使用率偏高 ({usage}%)', memoryHighImpact: '内存压力较大,需要关注', memoryHighAction: '监控内存趋势,检查是否有内存泄漏', - // Latency diagnostics - latencyCritical: '响应延迟严重过高 ({latency}ms)', - latencyCriticalImpact: '用户体验极差,大量请求超时', - latencyCriticalAction: '检查慢查询、数据库索引、网络延迟和上游服务', - latencyHigh: '响应延迟偏高 ({latency}ms)', - latencyHighImpact: '用户体验下降,需要优化', - latencyHighAction: '分析慢请求日志,优化数据库查询和业务逻辑', ttftHigh: '首字节时间偏高 ({ttft}ms)', - ttftHighImpact: '用户感知延迟增加', + ttftHighImpact: '用户感知时长增加', ttftHighAction: '优化请求处理流程,减少前置逻辑耗时', // Error rate diagnostics upstreamCritical: '上游错误率严重偏高 ({rate}%)', @@ -2143,13 +2136,13 @@ export default { // SLA diagnostics slaCritical: 'SLA 严重低于目标 ({sla}%)', slaCriticalImpact: '用户体验严重受损', - slaCriticalAction: '紧急排查错误和延迟问题,考虑限流保护', + slaCriticalAction: '紧急排查错误原因,必要时采取限流保护', slaLow: 'SLA 低于目标 ({sla}%)', slaLowImpact: '需要关注服务质量', slaLowAction: '分析SLA下降原因,优化系统性能', // Health score diagnostics healthCritical: '综合健康评分过低 ({score})', - healthCriticalImpact: '多个指标可能同时异常,建议优先排查错误与延迟', + healthCriticalImpact: '多个指标可能同时异常,建议优先排查错误与资源使用情况', healthCriticalAction: '全面检查系统状态,优先处理critical级别问题', healthLow: '综合健康评分偏低 ({score})', healthLowImpact: '可能存在轻度波动,建议关注 SLA 与错误率', @@ -2164,7 +2157,7 @@ export default { context: '上下文', status: '状态码', message: '消息', - latency: '延迟', + latency: '请求时长', action: '操作', noErrors: '该窗口内暂无错误。', grp: 'GRP:', @@ -2193,7 +2186,7 @@ export default { basicInfo: '基本信息', platform: '平台', model: '模型', - latency: '延迟', + latency: '请求时长', ttft: 'TTFT', businessLimited: '业务限制', requestPath: '请求路径', @@ -2351,8 +2344,8 @@ export default { successRate: '成功率 (%)', errorRate: '错误率 (%)', upstreamErrorRate: '上游错误率 (%)', - p95: 'P95 延迟 (ms)', - p99: 'P99 延迟 (ms)', + p95: 'P95 请求时长 (ms)', + p99: 'P99 请求时长 (ms)', cpu: 'CPU 使用率 (%)', memory: '内存使用率 (%)', queueDepth: '并发排队深度', @@ -2542,8 +2535,6 @@ export default { metricThresholdsHint: '配置各项指标的告警阈值,超出阈值时将以红色显示', slaMinPercent: 'SLA最低百分比', slaMinPercentHint: 'SLA低于此值时显示为红色(默认:99.5%)', - latencyP99MaxMs: '延迟P99最大值(毫秒)', - latencyP99MaxMsHint: '延迟P99高于此值时显示为红色(默认:2000ms)', ttftP99MaxMs: 'TTFT P99最大值(毫秒)', ttftP99MaxMsHint: 'TTFT P99高于此值时显示为红色(默认:500ms)', requestErrorRateMaxPercent: '请求错误率最大值(%)', @@ -2602,12 +2593,12 @@ export default { tooltips: { totalRequests: '当前时间窗口内的总请求数和Token消耗量。', throughputTrend: '当前窗口内的请求/QPS 与 token/TPS 趋势。', - latencyHistogram: '成功请求的延迟分布(毫秒)。', + latencyHistogram: '成功请求的请求时长分布(毫秒)。', errorTrend: '错误趋势(SLA 口径排除业务限制;上游错误率排除 429/529)。', errorDistribution: '按状态码统计的错误分布。', upstreamErrors: '上游服务返回的错误,包括API提供商的错误响应(排除429/529限流错误)。', goroutines: - 'Go 运行时的协程数量(轻量级线程)。没有绝对“安全值”,建议以历史基线为准。经验参考:<2000 常见;2000-8000 需关注;>8000 且伴随队列/延迟上升时,优先排查阻塞/泄漏。', + 'Go 运行时的协程数量(轻量级线程)。没有绝对"安全值",建议以历史基线为准。经验参考:<2000 常见;2000-8000 需关注;>8000 且伴随队列上升时,优先排查阻塞/泄漏。', cpu: 'CPU 使用率,显示系统处理器的负载情况。', memory: '内存使用率,包括已使用和总可用内存。', db: '数据库连接池状态,包括活跃连接、空闲连接和等待连接数。', @@ -2617,7 +2608,7 @@ export default { tokens: '当前时间窗口内处理的总Token数量。', sla: '服务等级协议达成率,排除业务限制(如余额不足、配额超限)的成功请求占比。', errors: '错误统计,包括总错误数、错误率和上游错误率。', - latency: '请求延迟统计,包括 p50、p90、p95、p99 等百分位数。', + latency: '请求时长统计,包括 p50、p90、p95、p99 等百分位数。', ttft: '首Token延迟(Time To First Token),衡量流式响应的首字节返回速度。', health: '系统健康评分(0-100),综合考虑 SLA、错误率和资源使用情况。' }, diff --git a/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue b/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue index 2cf097c0..627303c0 100644 --- a/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue +++ b/frontend/src/views/admin/ops/components/OpsAlertRulesCard.vue @@ -140,24 +140,6 @@ const metricDefinitions = computed(() => { recommendedThreshold: 1, unit: '%' }, - { - type: 'p95_latency_ms', - group: 'system', - label: t('admin.ops.alertRules.metrics.p95'), - description: t('admin.ops.alertRules.metricDescriptions.p95'), - recommendedOperator: '>', - recommendedThreshold: 1000, - unit: 'ms' - }, - { - type: 'p99_latency_ms', - group: 'system', - label: t('admin.ops.alertRules.metrics.p99'), - description: t('admin.ops.alertRules.metricDescriptions.p99'), - recommendedOperator: '>', - recommendedThreshold: 2000, - unit: 'ms' - }, { type: 'cpu_usage_percent', group: 'system', diff --git a/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue b/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue index 96dc9c8a..f92c6c50 100644 --- a/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue +++ b/frontend/src/views/admin/ops/components/OpsDashboardHeader.vue @@ -169,8 +169,8 @@ const updatedAtLabel = computed(() => { return props.lastUpdated.toLocaleTimeString() }) -// --- Color coding for latency/TTFT --- -function getLatencyColor(ms: number | null | undefined): string { +// --- Color coding for TTFT --- +function getTTFTColor(ms: number | null | undefined): string { if (ms == null) return 'text-gray-900 dark:text-white' if (ms < 500) return 'text-green-600 dark:text-green-400' if (ms < 1000) return 'text-yellow-600 dark:text-yellow-400' @@ -186,13 +186,6 @@ function isSLABelowThreshold(slaPercent: number | null): boolean { return slaPercent < threshold } -function isLatencyAboveThreshold(latencyP99Ms: number | null): boolean { - if (latencyP99Ms == null) return false - const threshold = props.thresholds?.latency_p99_ms_max - if (threshold == null) return false - return latencyP99Ms > threshold -} - function isTTFTAboveThreshold(ttftP99Ms: number | null): boolean { if (ttftP99Ms == null) return false const threshold = props.thresholds?.ttft_p99_ms_max @@ -482,24 +475,6 @@ const diagnosisReport = computed(() => { } } - // Latency diagnostics - const durationP99 = ov.duration?.p99_ms ?? 0 - if (durationP99 > 2000) { - report.push({ - type: 'critical', - message: t('admin.ops.diagnosis.latencyCritical', { latency: durationP99.toFixed(0) }), - impact: t('admin.ops.diagnosis.latencyCriticalImpact'), - action: t('admin.ops.diagnosis.latencyCriticalAction') - }) - } else if (durationP99 > 1000) { - report.push({ - type: 'warning', - message: t('admin.ops.diagnosis.latencyHigh', { latency: durationP99.toFixed(0) }), - impact: t('admin.ops.diagnosis.latencyHighImpact'), - action: t('admin.ops.diagnosis.latencyHighAction') - }) - } - const ttftP99 = ov.ttft?.p99_ms ?? 0 if (ttftP99 > 500) { report.push({ @@ -1181,7 +1156,7 @@ function handleToolbarRefresh() {
-
+
{{ t('admin.ops.requestsTitle') }} @@ -1217,7 +1192,7 @@ function handleToolbarRefresh() {
-
+
SLA @@ -1247,8 +1222,8 @@ function handleToolbarRefresh() {
- -
+ +
{{ t('admin.ops.latencyDuration') }} @@ -1264,7 +1239,7 @@ function handleToolbarRefresh() {
-
+
{{ durationP99Ms ?? '-' }}
ms (P99) @@ -1272,34 +1247,34 @@ function handleToolbarRefresh() {
P95: - {{ durationP95Ms ?? '-' }} + {{ durationP95Ms ?? '-' }} ms
P90: - {{ durationP90Ms ?? '-' }} + {{ durationP90Ms ?? '-' }} ms
P50: - {{ durationP50Ms ?? '-' }} + {{ durationP50Ms ?? '-' }} ms
Avg: - {{ durationAvgMs ?? '-' }} + {{ durationAvgMs ?? '-' }} ms
Max: - {{ durationMaxMs ?? '-' }} + {{ durationMaxMs ?? '-' }} ms
- -
+ +
TTFT @@ -1315,7 +1290,7 @@ function handleToolbarRefresh() {
-
+
{{ ttftP99Ms ?? '-' }}
ms (P99) @@ -1323,34 +1298,34 @@ function handleToolbarRefresh() {
P95: - {{ ttftP95Ms ?? '-' }} + {{ ttftP95Ms ?? '-' }} ms
P90: - {{ ttftP90Ms ?? '-' }} + {{ ttftP90Ms ?? '-' }} ms
P50: - {{ ttftP50Ms ?? '-' }} + {{ ttftP50Ms ?? '-' }} ms
Avg: - {{ ttftAvgMs ?? '-' }} + {{ ttftAvgMs ?? '-' }} ms
Max: - {{ ttftMaxMs ?? '-' }} + {{ ttftMaxMs ?? '-' }} ms
- -
+ +
{{ t('admin.ops.requestErrors') }} @@ -1376,7 +1351,7 @@ function handleToolbarRefresh() {
-
+
{{ t('admin.ops.upstreamErrors') }} diff --git a/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue b/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue index 244b3e13..1d9859d4 100644 --- a/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue +++ b/frontend/src/views/admin/ops/components/OpsErrorDetailsModal.vue @@ -205,12 +205,13 @@ watch(
-
-
-
-
+
+ +
+
+
-
- +
-
- +
-
- +
-
- +
-
- -
+
+ +
-
- +
+ +
diff --git a/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue b/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue index 1dcab4b3..d64ae390 100644 --- a/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue +++ b/frontend/src/views/admin/ops/components/OpsRuntimeSettingsCard.vue @@ -53,11 +53,6 @@ function validateRuntimeSettings(settings: OpsAlertRuntimeSettings): ValidationR errors.push('SLA 最低值必须在 0-100 之间') } } - if (thresholds.latency_p99_ms_max != null) { - if (!Number.isFinite(thresholds.latency_p99_ms_max) || thresholds.latency_p99_ms_max < 0) { - errors.push('延迟 P99 最大值必须大于或等于 0') - } - } if (thresholds.ttft_p99_ms_max != null) { if (!Number.isFinite(thresholds.ttft_p99_ms_max) || thresholds.ttft_p99_ms_max < 0) { errors.push('TTFT P99 最大值必须大于或等于 0') @@ -163,7 +158,6 @@ function openAlertEditor() { if (!draftAlert.value.thresholds) { draftAlert.value.thresholds = { sla_percent_min: 99.5, - latency_p99_ms_max: 2000, ttft_p99_ms_max: 500, request_error_rate_percent_max: 5, upstream_error_rate_percent_max: 5 @@ -353,18 +347,7 @@ onMounted(() => {

SLA 低于此值时将显示为红色

-
-
延迟 P99 最大值 (ms)
- -

延迟 P99 高于此值时将显示为红色

-
+
TTFT P99 最大值 (ms)
diff --git a/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue b/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue index 1f64f253..c8291313 100644 --- a/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue +++ b/frontend/src/views/admin/ops/components/OpsSettingsDialog.vue @@ -32,7 +32,6 @@ const advancedSettings = ref(null) // 指标阈值配置 const metricThresholds = ref({ sla_percent_min: 99.5, - latency_p99_ms_max: 2000, ttft_p99_ms_max: 500, request_error_rate_percent_max: 5, upstream_error_rate_percent_max: 5 @@ -53,13 +52,12 @@ async function loadAllSettings() { advancedSettings.value = advanced // 如果后端返回了阈值,使用后端的值;否则保持默认值 if (thresholds && Object.keys(thresholds).length > 0) { - metricThresholds.value = { - sla_percent_min: thresholds.sla_percent_min ?? 99.5, - latency_p99_ms_max: thresholds.latency_p99_ms_max ?? 2000, - ttft_p99_ms_max: thresholds.ttft_p99_ms_max ?? 500, - request_error_rate_percent_max: thresholds.request_error_rate_percent_max ?? 5, - upstream_error_rate_percent_max: thresholds.upstream_error_rate_percent_max ?? 5 - } + metricThresholds.value = { + sla_percent_min: thresholds.sla_percent_min ?? 99.5, + ttft_p99_ms_max: thresholds.ttft_p99_ms_max ?? 500, + request_error_rate_percent_max: thresholds.request_error_rate_percent_max ?? 5, + upstream_error_rate_percent_max: thresholds.upstream_error_rate_percent_max ?? 5 + } } } catch (err: any) { console.error('[OpsSettingsDialog] Failed to load settings', err) @@ -161,9 +159,6 @@ const validation = computed(() => { if (metricThresholds.value.sla_percent_min != null && (metricThresholds.value.sla_percent_min < 0 || metricThresholds.value.sla_percent_min > 100)) { errors.push('SLA最低百分比必须在0-100之间') } - if (metricThresholds.value.latency_p99_ms_max != null && metricThresholds.value.latency_p99_ms_max < 0) { - errors.push('延迟P99最大值必须大于等于0') - } if (metricThresholds.value.ttft_p99_ms_max != null && metricThresholds.value.ttft_p99_ms_max < 0) { errors.push('TTFT P99最大值必须大于等于0') } @@ -362,17 +357,6 @@ async function saveAllSettings() {

{{ t('admin.ops.settings.slaMinPercentHint') }}

-
- - -

{{ t('admin.ops.settings.latencyP99MaxMsHint') }}

-