refactor(ops): 移除duration相关告警指标,简化监控配置

主要改动:
- 移除 p95_latency_ms 和 p99_latency_ms 告警指标类型
- 移除配置中的 latency_p99_ms_max 阈值设置
- 简化健康分数计算(移除latency权重,重新归一化SLA和错误率)
- 移除duration相关的诊断规则和阈值检查
- 统一术语:延迟 → 请求时长
- 保留duration数据展示,但不再用于告警判断
- 聚焦TTFT作为主要的响应速度告警指标

影响范围:
- Backend: handler, service, models, tests
- Frontend: API types, i18n, components
This commit is contained in:
IanShaw027
2026-01-14 10:52:56 +08:00
parent 33f58d583d
commit 182683814b
14 changed files with 92 additions and 227 deletions

View File

@@ -20,8 +20,6 @@ var validOpsAlertMetricTypes = []string{
"success_rate",
"error_rate",
"upstream_error_rate",
"p95_latency_ms",
"p99_latency_ms",
"cpu_usage_percent",
"memory_usage_percent",
"concurrency_queue_depth",

View File

@@ -523,16 +523,6 @@ func (s *OpsAlertEvaluatorService) computeRuleMetric(
return 0, false
}
return overview.UpstreamErrorRate * 100, true
case "p95_latency_ms":
if overview.Duration.P95 == nil {
return 0, false
}
return float64(*overview.Duration.P95), true
case "p99_latency_ms":
if overview.Duration.P99 == nil {
return 0, false
}
return float64(*overview.Duration.P99), true
default:
return 0, false
}

View File

@@ -32,7 +32,7 @@ func computeDashboardHealthScore(now time.Time, overview *OpsDashboardOverview)
}
// computeBusinessHealth calculates business health score (0-100)
// Components: SLA (50%) + Error Rate (30%) + Latency (20%)
// Components: SLA (50%) + Error Rate (30%)
func computeBusinessHealth(overview *OpsDashboardOverview) float64 {
// SLA score: 99.5% → 100, 95% → 0 (linear)
slaScore := 100.0
@@ -59,22 +59,9 @@ func computeBusinessHealth(overview *OpsDashboardOverview) float64 {
}
}
// Latency score: 1s → 100, 10s → 0 (linear)
// Uses P99 of duration (TTFT is less critical for overall health)
latencyScore := 100.0
if overview.Duration.P99 != nil {
p99 := float64(*overview.Duration.P99)
if p99 > 1000 {
if p99 <= 10000 {
latencyScore = (10000 - p99) / 9000 * 100
} else {
latencyScore = 0
}
}
}
// Weighted combination
return slaScore*0.5 + errorScore*0.3 + latencyScore*0.2
// Weighted combination (renormalized after removing duration)
const weightSum = 0.8
return (slaScore*0.5 + errorScore*0.3) / weightSum
}
// computeInfraHealth calculates infrastructure health score (0-100)

View File

@@ -291,17 +291,6 @@ func TestComputeBusinessHealth(t *testing.T) {
wantMin: 95,
wantMax: 100,
},
{
name: "latency boundary 1000ms",
overview: &OpsDashboardOverview{
SLA: 0.995,
ErrorRate: 0,
UpstreamErrorRate: 0,
Duration: OpsPercentiles{P99: intPtr(1000)},
},
wantMin: 95,
wantMax: 100,
},
{
name: "upstream error dominates",
overview: &OpsDashboardOverview{

View File

@@ -482,13 +482,11 @@ const SettingKeyOpsMetricThresholds = "ops_metric_thresholds"
func defaultOpsMetricThresholds() *OpsMetricThresholds {
slaMin := 99.5
latencyMax := 2000.0
ttftMax := 500.0
reqErrMax := 5.0
upstreamErrMax := 5.0
return &OpsMetricThresholds{
SLAPercentMin: &slaMin,
LatencyP99MsMax: &latencyMax,
TTFTp99MsMax: &ttftMax,
RequestErrorRatePercentMax: &reqErrMax,
UpstreamErrorRatePercentMax: &upstreamErrMax,
@@ -538,9 +536,6 @@ func (s *OpsService) UpdateMetricThresholds(ctx context.Context, cfg *OpsMetricT
if cfg.SLAPercentMin != nil && (*cfg.SLAPercentMin < 0 || *cfg.SLAPercentMin > 100) {
return nil, errors.New("sla_percent_min must be between 0 and 100")
}
if cfg.LatencyP99MsMax != nil && *cfg.LatencyP99MsMax < 0 {
return nil, errors.New("latency_p99_ms_max must be >= 0")
}
if cfg.TTFTp99MsMax != nil && *cfg.TTFTp99MsMax < 0 {
return nil, errors.New("ttft_p99_ms_max must be >= 0")
}

View File

@@ -63,7 +63,6 @@ type OpsAlertSilencingSettings struct {
type OpsMetricThresholds struct {
SLAPercentMin *float64 `json:"sla_percent_min,omitempty"` // SLA低于此值变红
LatencyP99MsMax *float64 `json:"latency_p99_ms_max,omitempty"` // 延迟P99高于此值变红
TTFTp99MsMax *float64 `json:"ttft_p99_ms_max,omitempty"` // TTFT P99高于此值变红
RequestErrorRatePercentMax *float64 `json:"request_error_rate_percent_max,omitempty"` // 请求错误率高于此值变红
UpstreamErrorRatePercentMax *float64 `json:"upstream_error_rate_percent_max,omitempty"` // 上游错误率高于此值变红

View File

@@ -653,8 +653,6 @@ export type MetricType =
| 'success_rate'
| 'error_rate'
| 'upstream_error_rate'
| 'p95_latency_ms'
| 'p99_latency_ms'
| 'cpu_usage_percent'
| 'memory_usage_percent'
| 'concurrency_queue_depth'
@@ -729,7 +727,6 @@ export interface EmailNotificationConfig {
export interface OpsMetricThresholds {
sla_percent_min?: number | null // SLA低于此值变红
latency_p99_ms_max?: number | null // 延迟 P99 高于此值变红
ttft_p99_ms_max?: number | null // TTFT P99高于此值变红
request_error_rate_percent_max?: number | null // 请求错误率高于此值变红
upstream_error_rate_percent_max?: number | null // 上游错误率高于此值变红

View File

@@ -1887,7 +1887,7 @@ export default {
totalRequests: 'Total Requests',
avgQps: 'Avg QPS',
avgTps: 'Avg TPS',
avgLatency: 'Avg Latency',
avgLatency: 'Avg Request Duration',
avgTtft: 'Avg TTFT',
exceptions: 'Exceptions',
requestErrors: 'Request Errors',
@@ -1899,7 +1899,7 @@ export default {
errors: 'Errors',
errorRate: 'error_rate:',
upstreamRate: 'upstream_rate:',
latencyDuration: 'Latency (duration_ms)',
latencyDuration: 'Request Duration (ms)',
ttftLabel: 'TTFT (first_token_ms)',
p50: 'p50:',
p90: 'p90:',
@@ -1919,7 +1919,7 @@ export default {
failedToLoadData: 'Failed to load ops data.',
failedToLoadOverview: 'Failed to load overview',
failedToLoadThroughputTrend: 'Failed to load throughput trend',
failedToLoadLatencyHistogram: 'Failed to load latency histogram',
failedToLoadLatencyHistogram: 'Failed to load request duration histogram',
failedToLoadErrorTrend: 'Failed to load error trend',
failedToLoadErrorDistribution: 'Failed to load error distribution',
failedToLoadErrorDetail: 'Failed to load error detail',
@@ -1927,7 +1927,7 @@ export default {
tpsK: 'TPS (K)',
top: 'Top:',
throughputTrend: 'Throughput Trend',
latencyHistogram: 'Latency Histogram',
latencyHistogram: 'Request Duration Histogram',
errorTrend: 'Error Trend',
errorDistribution: 'Error Distribution',
// Health Score & Diagnosis
@@ -1973,14 +1973,7 @@ export default {
memoryHigh: 'Memory usage elevated ({usage}%)',
memoryHighImpact: 'Memory pressure is high, needs attention',
memoryHighAction: 'Monitor memory trends, check for memory leaks',
// Latency diagnostics
latencyCritical: 'Response latency critically high ({latency}ms)',
latencyCriticalImpact: 'User experience extremely poor, many requests timing out',
latencyCriticalAction: 'Check slow queries, database indexes, network latency, and upstream services',
latencyHigh: 'Response latency elevated ({latency}ms)',
latencyHighImpact: 'User experience degraded, needs optimization',
latencyHighAction: 'Analyze slow request logs, optimize database queries and business logic',
ttftHigh: 'Time to first byte elevated ({ttft}ms)',
ttftHigh: 'Time to first token elevated ({ttft}ms)',
ttftHighImpact: 'User perceived latency increased',
ttftHighAction: 'Optimize request processing flow, reduce pre-processing time',
// Error rate diagnostics
@@ -2020,7 +2013,7 @@ export default {
context: 'Context',
status: 'Status',
message: 'Message',
latency: 'Latency',
latency: 'Request Duration',
action: 'Action',
noErrors: 'No errors in this window.',
grp: 'GRP:',
@@ -2049,7 +2042,7 @@ export default {
basicInfo: 'Basic Info',
platform: 'Platform',
model: 'Model',
latency: 'Latency',
latency: 'Request Duration',
ttft: 'TTFT',
businessLimited: 'Business Limited',
requestPath: 'Request Path',
@@ -2398,8 +2391,6 @@ export default {
metricThresholdsHint: 'Configure alert thresholds for metrics, values exceeding thresholds will be displayed in red',
slaMinPercent: 'SLA Minimum Percentage',
slaMinPercentHint: 'SLA below this value will be displayed in red (default: 99.5%)',
latencyP99MaxMs: 'Latency P99 Maximum (ms)',
latencyP99MaxMsHint: 'Latency P99 above this value will be displayed in red (default: 2000ms)',
ttftP99MaxMs: 'TTFT P99 Maximum (ms)',
ttftP99MaxMsHint: 'TTFT P99 above this value will be displayed in red (default: 500ms)',
requestErrorRateMaxPercent: 'Request Error Rate Maximum (%)',
@@ -2458,7 +2449,7 @@ export default {
tooltips: {
totalRequests: 'Total number of requests (including both successful and failed requests) in the selected time window.',
throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
latencyHistogram: 'Request duration distribution (ms) for successful requests.',
errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
errorDistribution: 'Error distribution by status code.',
goroutines:
@@ -2473,7 +2464,7 @@ export default {
sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
latency: 'Request duration statistics, including p50, p90, p95, p99 percentiles.',
ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
},

View File

@@ -2031,7 +2031,7 @@ export default {
totalRequests: '总请求',
avgQps: '平均 QPS',
avgTps: '平均 TPS',
avgLatency: '平均延迟',
avgLatency: '平均请求时长',
avgTtft: '平均首字延迟',
exceptions: '异常数',
requestErrors: '请求错误',
@@ -2043,7 +2043,7 @@ export default {
errors: '错误',
errorRate: '错误率:',
upstreamRate: '上游错误率:',
latencyDuration: '延迟(毫秒)',
latencyDuration: '请求时长(毫秒)',
ttftLabel: '首字延迟(毫秒)',
p50: 'p50',
p90: 'p90',
@@ -2063,7 +2063,7 @@ export default {
failedToLoadData: '加载运维数据失败',
failedToLoadOverview: '加载概览数据失败',
failedToLoadThroughputTrend: '加载吞吐趋势失败',
failedToLoadLatencyHistogram: '加载延迟分布失败',
failedToLoadLatencyHistogram: '加载请求时长分布失败',
failedToLoadErrorTrend: '加载错误趋势失败',
failedToLoadErrorDistribution: '加载错误分布失败',
failedToLoadErrorDetail: '加载错误详情失败',
@@ -2071,7 +2071,7 @@ export default {
tpsK: 'TPS',
top: '最高:',
throughputTrend: '吞吐趋势',
latencyHistogram: '延迟分布',
latencyHistogram: '请求时长分布',
errorTrend: '错误趋势',
errorDistribution: '错误分布',
// Health Score & Diagnosis
@@ -2117,15 +2117,8 @@ export default {
memoryHigh: '内存使用率偏高 ({usage}%)',
memoryHighImpact: '内存压力较大,需要关注',
memoryHighAction: '监控内存趋势,检查是否有内存泄漏',
// Latency diagnostics
latencyCritical: '响应延迟严重过高 ({latency}ms)',
latencyCriticalImpact: '用户体验极差,大量请求超时',
latencyCriticalAction: '检查慢查询、数据库索引、网络延迟和上游服务',
latencyHigh: '响应延迟偏高 ({latency}ms)',
latencyHighImpact: '用户体验下降,需要优化',
latencyHighAction: '分析慢请求日志,优化数据库查询和业务逻辑',
ttftHigh: '首字节时间偏高 ({ttft}ms)',
ttftHighImpact: '用户感知延迟增加',
ttftHighImpact: '用户感知时长增加',
ttftHighAction: '优化请求处理流程,减少前置逻辑耗时',
// Error rate diagnostics
upstreamCritical: '上游错误率严重偏高 ({rate}%)',
@@ -2143,13 +2136,13 @@ export default {
// SLA diagnostics
slaCritical: 'SLA 严重低于目标 ({sla}%)',
slaCriticalImpact: '用户体验严重受损',
slaCriticalAction: '紧急排查错误和延迟问题,考虑限流保护',
slaCriticalAction: '紧急排查错误原因,必要时采取限流保护',
slaLow: 'SLA 低于目标 ({sla}%)',
slaLowImpact: '需要关注服务质量',
slaLowAction: '分析SLA下降原因优化系统性能',
// Health score diagnostics
healthCritical: '综合健康评分过低 ({score})',
healthCriticalImpact: '多个指标可能同时异常,建议优先排查错误与延迟',
healthCriticalImpact: '多个指标可能同时异常,建议优先排查错误与资源使用情况',
healthCriticalAction: '全面检查系统状态优先处理critical级别问题',
healthLow: '综合健康评分偏低 ({score})',
healthLowImpact: '可能存在轻度波动,建议关注 SLA 与错误率',
@@ -2164,7 +2157,7 @@ export default {
context: '上下文',
status: '状态码',
message: '消息',
latency: '延迟',
latency: '请求时长',
action: '操作',
noErrors: '该窗口内暂无错误。',
grp: 'GRP',
@@ -2193,7 +2186,7 @@ export default {
basicInfo: '基本信息',
platform: '平台',
model: '模型',
latency: '延迟',
latency: '请求时长',
ttft: 'TTFT',
businessLimited: '业务限制',
requestPath: '请求路径',
@@ -2351,8 +2344,8 @@ export default {
successRate: '成功率 (%)',
errorRate: '错误率 (%)',
upstreamErrorRate: '上游错误率 (%)',
p95: 'P95 延迟 (ms)',
p99: 'P99 延迟 (ms)',
p95: 'P95 请求时长 (ms)',
p99: 'P99 请求时长 (ms)',
cpu: 'CPU 使用率 (%)',
memory: '内存使用率 (%)',
queueDepth: '并发排队深度',
@@ -2542,8 +2535,6 @@ export default {
metricThresholdsHint: '配置各项指标的告警阈值,超出阈值时将以红色显示',
slaMinPercent: 'SLA最低百分比',
slaMinPercentHint: 'SLA低于此值时显示为红色默认99.5%',
latencyP99MaxMs: '延迟P99最大值毫秒',
latencyP99MaxMsHint: '延迟P99高于此值时显示为红色默认2000ms',
ttftP99MaxMs: 'TTFT P99最大值毫秒',
ttftP99MaxMsHint: 'TTFT P99高于此值时显示为红色默认500ms',
requestErrorRateMaxPercent: '请求错误率最大值(%',
@@ -2602,12 +2593,12 @@ export default {
tooltips: {
totalRequests: '当前时间窗口内的总请求数和Token消耗量。',
throughputTrend: '当前窗口内的请求/QPS 与 token/TPS 趋势。',
latencyHistogram: '成功请求的延迟分布(毫秒)。',
latencyHistogram: '成功请求的请求时长分布(毫秒)。',
errorTrend: '错误趋势SLA 口径排除业务限制;上游错误率排除 429/529。',
errorDistribution: '按状态码统计的错误分布。',
upstreamErrors: '上游服务返回的错误包括API提供商的错误响应排除429/529限流错误。',
goroutines:
'Go 运行时的协程数量(轻量级线程)。没有绝对安全值,建议以历史基线为准。经验参考:<2000 常见2000-8000 需关注;>8000 且伴随队列/延迟上升时,优先排查阻塞/泄漏。',
'Go 运行时的协程数量(轻量级线程)。没有绝对"安全值",建议以历史基线为准。经验参考:<2000 常见2000-8000 需关注;>8000 且伴随队列上升时,优先排查阻塞/泄漏。',
cpu: 'CPU 使用率,显示系统处理器的负载情况。',
memory: '内存使用率,包括已使用和总可用内存。',
db: '数据库连接池状态,包括活跃连接、空闲连接和等待连接数。',
@@ -2617,7 +2608,7 @@ export default {
tokens: '当前时间窗口内处理的总Token数量。',
sla: '服务等级协议达成率,排除业务限制(如余额不足、配额超限)的成功请求占比。',
errors: '错误统计,包括总错误数、错误率和上游错误率。',
latency: '请求延迟统计,包括 p50、p90、p95、p99 等百分位数。',
latency: '请求时长统计,包括 p50、p90、p95、p99 等百分位数。',
ttft: '首Token延迟Time To First Token衡量流式响应的首字节返回速度。',
health: '系统健康评分0-100综合考虑 SLA、错误率和资源使用情况。'
},

View File

@@ -140,24 +140,6 @@ const metricDefinitions = computed(() => {
recommendedThreshold: 1,
unit: '%'
},
{
type: 'p95_latency_ms',
group: 'system',
label: t('admin.ops.alertRules.metrics.p95'),
description: t('admin.ops.alertRules.metricDescriptions.p95'),
recommendedOperator: '>',
recommendedThreshold: 1000,
unit: 'ms'
},
{
type: 'p99_latency_ms',
group: 'system',
label: t('admin.ops.alertRules.metrics.p99'),
description: t('admin.ops.alertRules.metricDescriptions.p99'),
recommendedOperator: '>',
recommendedThreshold: 2000,
unit: 'ms'
},
{
type: 'cpu_usage_percent',
group: 'system',

View File

@@ -169,8 +169,8 @@ const updatedAtLabel = computed(() => {
return props.lastUpdated.toLocaleTimeString()
})
// --- Color coding for latency/TTFT ---
function getLatencyColor(ms: number | null | undefined): string {
// --- Color coding for TTFT ---
function getTTFTColor(ms: number | null | undefined): string {
if (ms == null) return 'text-gray-900 dark:text-white'
if (ms < 500) return 'text-green-600 dark:text-green-400'
if (ms < 1000) return 'text-yellow-600 dark:text-yellow-400'
@@ -186,13 +186,6 @@ function isSLABelowThreshold(slaPercent: number | null): boolean {
return slaPercent < threshold
}
function isLatencyAboveThreshold(latencyP99Ms: number | null): boolean {
if (latencyP99Ms == null) return false
const threshold = props.thresholds?.latency_p99_ms_max
if (threshold == null) return false
return latencyP99Ms > threshold
}
function isTTFTAboveThreshold(ttftP99Ms: number | null): boolean {
if (ttftP99Ms == null) return false
const threshold = props.thresholds?.ttft_p99_ms_max
@@ -482,24 +475,6 @@ const diagnosisReport = computed<DiagnosisItem[]>(() => {
}
}
// Latency diagnostics
const durationP99 = ov.duration?.p99_ms ?? 0
if (durationP99 > 2000) {
report.push({
type: 'critical',
message: t('admin.ops.diagnosis.latencyCritical', { latency: durationP99.toFixed(0) }),
impact: t('admin.ops.diagnosis.latencyCriticalImpact'),
action: t('admin.ops.diagnosis.latencyCriticalAction')
})
} else if (durationP99 > 1000) {
report.push({
type: 'warning',
message: t('admin.ops.diagnosis.latencyHigh', { latency: durationP99.toFixed(0) }),
impact: t('admin.ops.diagnosis.latencyHighImpact'),
action: t('admin.ops.diagnosis.latencyHighAction')
})
}
const ttftP99 = ov.ttft?.p99_ms ?? 0
if (ttftP99 > 500) {
report.push({
@@ -1181,7 +1156,7 @@ function handleToolbarRefresh() {
<!-- Right: 6 cards (3 cols x 2 rows) -->
<div class="grid h-full grid-cols-1 content-center gap-4 sm:grid-cols-2 lg:col-span-7 lg:grid-cols-3">
<!-- Card 1: Requests -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 1;">
<div class="flex items-center justify-between">
<div class="flex items-center gap-1">
<span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requestsTitle') }}</span>
@@ -1217,7 +1192,7 @@ function handleToolbarRefresh() {
</div>
<!-- Card 2: SLA -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 2;">
<div class="flex items-center justify-between">
<div class="flex items-center gap-2">
<span class="text-[10px] font-bold uppercase text-gray-400">SLA</span>
@@ -1247,8 +1222,8 @@ function handleToolbarRefresh() {
</div>
</div>
<!-- Card 3: Latency (Duration) -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
<!-- Card 4: Request Duration -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 4;">
<div class="flex items-center justify-between">
<div class="flex items-center gap-1">
<span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.latencyDuration') }}</span>
@@ -1264,7 +1239,7 @@ function handleToolbarRefresh() {
</button>
</div>
<div class="mt-2 flex items-baseline gap-2">
<div class="text-3xl font-black" :class="isLatencyAboveThreshold(durationP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(durationP99Ms)">
<div class="text-3xl font-black text-gray-900 dark:text-white">
{{ durationP99Ms ?? '-' }}
</div>
<span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1272,34 +1247,34 @@ function handleToolbarRefresh() {
<div class="mt-3 flex flex-wrap gap-x-3 gap-y-1 text-xs">
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">P95:</span>
<span class="font-bold" :class="getLatencyColor(durationP95Ms)">{{ durationP95Ms ?? '-' }}</span>
<span class="font-bold text-gray-900 dark:text-white">{{ durationP95Ms ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">P90:</span>
<span class="font-bold" :class="getLatencyColor(durationP90Ms)">{{ durationP90Ms ?? '-' }}</span>
<span class="font-bold text-gray-900 dark:text-white">{{ durationP90Ms ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">P50:</span>
<span class="font-bold" :class="getLatencyColor(durationP50Ms)">{{ durationP50Ms ?? '-' }}</span>
<span class="font-bold text-gray-900 dark:text-white">{{ durationP50Ms ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">Avg:</span>
<span class="font-bold" :class="getLatencyColor(durationAvgMs)">{{ durationAvgMs ?? '-' }}</span>
<span class="font-bold text-gray-900 dark:text-white">{{ durationAvgMs ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">Max:</span>
<span class="font-bold" :class="getLatencyColor(durationMaxMs)">{{ durationMaxMs ?? '-' }}</span>
<span class="font-bold text-gray-900 dark:text-white">{{ durationMaxMs ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
</div>
</div>
<!-- Card 4: TTFT -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
<!-- Card 5: TTFT -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 5;">
<div class="flex items-center justify-between">
<div class="flex items-center gap-1">
<span class="text-[10px] font-bold uppercase text-gray-400">TTFT</span>
@@ -1315,7 +1290,7 @@ function handleToolbarRefresh() {
</button>
</div>
<div class="mt-2 flex items-baseline gap-2">
<div class="text-3xl font-black" :class="isTTFTAboveThreshold(ttftP99Ms) ? 'text-red-600 dark:text-red-400' : getLatencyColor(ttftP99Ms)">
<div class="text-3xl font-black" :class="isTTFTAboveThreshold(ttftP99Ms) ? 'text-red-600 dark:text-red-400' : getTTFTColor(ttftP99Ms)">
{{ ttftP99Ms ?? '-' }}
</div>
<span class="text-xs font-bold text-gray-400">ms (P99)</span>
@@ -1323,34 +1298,34 @@ function handleToolbarRefresh() {
<div class="mt-3 flex flex-wrap gap-x-3 gap-y-1 text-xs">
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">P95:</span>
<span class="font-bold" :class="getLatencyColor(ttftP95Ms)">{{ ttftP95Ms ?? '-' }}</span>
<span class="font-bold" :class="getTTFTColor(ttftP95Ms)">{{ ttftP95Ms ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">P90:</span>
<span class="font-bold" :class="getLatencyColor(ttftP90Ms)">{{ ttftP90Ms ?? '-' }}</span>
<span class="font-bold" :class="getTTFTColor(ttftP90Ms)">{{ ttftP90Ms ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">P50:</span>
<span class="font-bold" :class="getLatencyColor(ttftP50Ms)">{{ ttftP50Ms ?? '-' }}</span>
<span class="font-bold" :class="getTTFTColor(ttftP50Ms)">{{ ttftP50Ms ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">Avg:</span>
<span class="font-bold" :class="getLatencyColor(ttftAvgMs)">{{ ttftAvgMs ?? '-' }}</span>
<span class="font-bold" :class="getTTFTColor(ttftAvgMs)">{{ ttftAvgMs ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
<span class="text-gray-500">Max:</span>
<span class="font-bold" :class="getLatencyColor(ttftMaxMs)">{{ ttftMaxMs ?? '-' }}</span>
<span class="font-bold" :class="getTTFTColor(ttftMaxMs)">{{ ttftMaxMs ?? '-' }}</span>
<span class="text-gray-400">ms</span>
</div>
</div>
</div>
<!-- Card 5: Request Errors -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
<!-- Card 3: Request Errors -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 3;">
<div class="flex items-center justify-between">
<div class="flex items-center gap-1">
<span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.requestErrors') }}</span>
@@ -1376,7 +1351,7 @@ function handleToolbarRefresh() {
</div>
<!-- Card 6: Upstream Errors -->
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900">
<div class="rounded-2xl bg-gray-50 p-4 dark:bg-dark-900" style="order: 6;">
<div class="flex items-center justify-between">
<div class="flex items-center gap-1">
<span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.upstreamErrors') }}</span>

View File

@@ -205,12 +205,13 @@ watch(
<div class="flex h-full min-h-0 flex-col">
<!-- Filters -->
<div class="mb-4 flex-shrink-0 border-b border-gray-200 pb-4 dark:border-dark-700">
<div class="grid grid-cols-1 gap-4 lg:grid-cols-14">
<div class="lg:col-span-4">
<div class="relative group">
<div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3.5">
<div class="flex flex-col gap-2">
<!-- 第一行: 搜索框 -->
<div class="flex items-center gap-2">
<div class="relative flex-1 group">
<div class="pointer-events-none absolute inset-y-0 left-0 flex items-center pl-3">
<svg
class="h-4 w-4 text-gray-400 transition-colors group-focus-within:text-blue-500"
class="h-3.5 w-3.5 text-gray-400 transition-colors group-focus-within:text-blue-500"
fill="none"
viewBox="0 0 24 24"
stroke="currentColor"
@@ -221,42 +222,45 @@ watch(
<input
v-model="q"
type="text"
class="w-full rounded-2xl border-gray-200 bg-gray-50/50 py-2 pl-10 pr-4 text-sm font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-4 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
class="w-full rounded-lg border-gray-200 bg-gray-50/50 py-1.5 pl-9 pr-3 text-xs font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-2 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
:placeholder="t('admin.ops.errorDetails.searchPlaceholder')"
/>
</div>
</div>
<div class="lg:col-span-2">
<Select :model-value="statusCode" :options="statusCodeSelectOptions" class="w-full" @update:model-value="statusCode = $event as any" />
</div>
<!-- 第二行: 筛选选项 -->
<div class="grid grid-cols-6 gap-2">
<div class="col-span-1">
<Select :model-value="statusCode" :options="statusCodeSelectOptions" size="sm" @update:model-value="statusCode = $event as any" />
</div>
<div class="lg:col-span-2">
<Select :model-value="phase" :options="phaseSelectOptions" class="w-full" @update:model-value="phase = String($event ?? '')" />
</div>
<div class="col-span-1">
<Select :model-value="phase" :options="phaseSelectOptions" size="sm" @update:model-value="phase = String($event ?? '')" />
</div>
<div class="lg:col-span-2">
<Select :model-value="errorOwner" :options="ownerSelectOptions" class="w-full" @update:model-value="errorOwner = String($event ?? '')" />
</div>
<div class="col-span-1">
<Select :model-value="errorOwner" :options="ownerSelectOptions" size="sm" @update:model-value="errorOwner = String($event ?? '')" />
</div>
<div class="lg:col-span-2">
<Select :model-value="resolvedStatus" :options="resolvedSelectOptions" class="w-full" @update:model-value="resolvedStatus = String($event ?? 'unresolved')" />
</div>
<div class="col-span-1">
<Select :model-value="resolvedStatus" :options="resolvedSelectOptions" size="sm" @update:model-value="resolvedStatus = String($event ?? 'unresolved')" />
</div>
<div class="lg:col-span-1">
<input
v-model="accountIdInput"
type="text"
inputmode="numeric"
class="input w-full text-sm"
:placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
/>
</div>
<div class="col-span-1">
<input
v-model="accountIdInput"
type="text"
inputmode="numeric"
class="w-full rounded-lg border-gray-200 bg-gray-50/50 py-1.5 px-3 text-xs font-medium text-gray-700 transition-all focus:border-blue-500 focus:bg-white focus:ring-2 focus:ring-blue-500/10 dark:border-dark-700 dark:bg-dark-900 dark:text-gray-300 dark:focus:bg-dark-800"
:placeholder="t('admin.ops.errorDetails.accountIdPlaceholder')"
/>
</div>
<div class="lg:col-span-1 flex items-center justify-end">
<button type="button" class="btn btn-secondary btn-sm" @click="resetFilters">
{{ t('common.reset') }}
</button>
<div class="col-span-1 flex items-center justify-end">
<button type="button" class="rounded-lg bg-gray-100 px-3 py-1.5 text-xs font-semibold text-gray-700 transition-colors hover:bg-gray-200 dark:bg-dark-700 dark:text-gray-300 dark:hover:bg-dark-600" @click="resetFilters">
{{ t('common.reset') }}
</button>
</div>
</div>
</div>
</div>

View File

@@ -53,11 +53,6 @@ function validateRuntimeSettings(settings: OpsAlertRuntimeSettings): ValidationR
errors.push('SLA 最低值必须在 0-100 之间')
}
}
if (thresholds.latency_p99_ms_max != null) {
if (!Number.isFinite(thresholds.latency_p99_ms_max) || thresholds.latency_p99_ms_max < 0) {
errors.push('延迟 P99 最大值必须大于或等于 0')
}
}
if (thresholds.ttft_p99_ms_max != null) {
if (!Number.isFinite(thresholds.ttft_p99_ms_max) || thresholds.ttft_p99_ms_max < 0) {
errors.push('TTFT P99 最大值必须大于或等于 0')
@@ -163,7 +158,6 @@ function openAlertEditor() {
if (!draftAlert.value.thresholds) {
draftAlert.value.thresholds = {
sla_percent_min: 99.5,
latency_p99_ms_max: 2000,
ttft_p99_ms_max: 500,
request_error_rate_percent_max: 5,
upstream_error_rate_percent_max: 5
@@ -353,18 +347,7 @@ onMounted(() => {
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">SLA 低于此值时将显示为红色</p>
</div>
<div>
<div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">延迟 P99 最大值 (ms)</div>
<input
v-model.number="draftAlert.thresholds.latency_p99_ms_max"
type="number"
min="0"
step="100"
class="input"
placeholder="2000"
/>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">延迟 P99 高于此值时将显示为红色</p>
</div>
<div>
<div class="mb-1 text-xs font-medium text-gray-600 dark:text-gray-300">TTFT P99 最大值 (ms)</div>

View File

@@ -32,7 +32,6 @@ const advancedSettings = ref<OpsAdvancedSettings | null>(null)
// 指标阈值配置
const metricThresholds = ref<OpsMetricThresholds>({
sla_percent_min: 99.5,
latency_p99_ms_max: 2000,
ttft_p99_ms_max: 500,
request_error_rate_percent_max: 5,
upstream_error_rate_percent_max: 5
@@ -53,13 +52,12 @@ async function loadAllSettings() {
advancedSettings.value = advanced
// 如果后端返回了阈值,使用后端的值;否则保持默认值
if (thresholds && Object.keys(thresholds).length > 0) {
metricThresholds.value = {
sla_percent_min: thresholds.sla_percent_min ?? 99.5,
latency_p99_ms_max: thresholds.latency_p99_ms_max ?? 2000,
ttft_p99_ms_max: thresholds.ttft_p99_ms_max ?? 500,
request_error_rate_percent_max: thresholds.request_error_rate_percent_max ?? 5,
upstream_error_rate_percent_max: thresholds.upstream_error_rate_percent_max ?? 5
}
metricThresholds.value = {
sla_percent_min: thresholds.sla_percent_min ?? 99.5,
ttft_p99_ms_max: thresholds.ttft_p99_ms_max ?? 500,
request_error_rate_percent_max: thresholds.request_error_rate_percent_max ?? 5,
upstream_error_rate_percent_max: thresholds.upstream_error_rate_percent_max ?? 5
}
}
} catch (err: any) {
console.error('[OpsSettingsDialog] Failed to load settings', err)
@@ -161,9 +159,6 @@ const validation = computed(() => {
if (metricThresholds.value.sla_percent_min != null && (metricThresholds.value.sla_percent_min < 0 || metricThresholds.value.sla_percent_min > 100)) {
errors.push('SLA最低百分比必须在0-100之间')
}
if (metricThresholds.value.latency_p99_ms_max != null && metricThresholds.value.latency_p99_ms_max < 0) {
errors.push('延迟P99最大值必须大于等于0')
}
if (metricThresholds.value.ttft_p99_ms_max != null && metricThresholds.value.ttft_p99_ms_max < 0) {
errors.push('TTFT P99最大值必须大于等于0')
}
@@ -362,17 +357,6 @@ async function saveAllSettings() {
<p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.slaMinPercentHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.ops.settings.latencyP99MaxMs') }}</label>
<input
v-model.number="metricThresholds.latency_p99_ms_max"
type="number"
min="0"
step="100"
class="input"
/>
<p class="mt-1 text-xs text-gray-500">{{ t('admin.ops.settings.latencyP99MaxMsHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.ops.settings.ttftP99MaxMs') }}</label>