feat(运维监控): 增强监控功能和健康评分系统

后端改进:
- 新增健康评分计算服务(ops_health_score.go)
- 添加分布式锁支持(ops_advisory_lock.go)
- 优化指标采集和聚合逻辑
- 新增运维指标采集间隔配置(60-3600秒)
- 移除未使用的WebSocket查询token认证中间件
- 改进清理服务和告警评估逻辑

前端改进:
- 简化OpsDashboard组件结构
- 完善国际化文本(中英文)
- 新增运维监控相关API类型定义
- 添加运维指标采集间隔设置界面
- 优化错误详情模态框

测试:
- 添加健康评分单元测试
- 更新API契约测试
This commit is contained in:
IanShaw027
2026-01-10 01:38:47 +08:00
parent 8ae75e7f6e
commit 585257d340
25 changed files with 570 additions and 385 deletions

View File

@@ -46,6 +46,8 @@ export interface OpsDashboardOverview {
platform: string
group_id?: number | null
health_score?: number
system_metrics?: OpsSystemMetricsSnapshot | null
job_heartbeats?: OpsJobHeartbeat[] | null
@@ -228,6 +230,9 @@ export interface OpsSystemMetricsSnapshot {
db_ok?: boolean | null
redis_ok?: boolean | null
redis_conn_total?: number | null
redis_conn_idle?: number | null
db_conn_active?: number | null
db_conn_idle?: number | null
db_conn_waiting?: number | null

View File

@@ -50,6 +50,7 @@ export interface SystemSettings {
ops_monitoring_enabled: boolean
ops_realtime_monitoring_enabled: boolean
ops_query_mode_default: 'auto' | 'raw' | 'preagg' | string
ops_metrics_interval_seconds: number
}
export interface UpdateSettingsRequest {
@@ -83,6 +84,7 @@ export interface UpdateSettingsRequest {
ops_monitoring_enabled?: boolean
ops_realtime_monitoring_enabled?: boolean
ops_query_mode_default?: 'auto' | 'raw' | 'preagg' | string
ops_metrics_interval_seconds?: number
}
/**

View File

@@ -1733,8 +1733,10 @@ export default {
redis: 'Redis',
goroutines: 'Goroutines',
jobs: 'Jobs',
jobsHelp: 'Click “Details” to view job heartbeats and recent errors',
active: 'active',
idle: 'idle',
waiting: 'waiting',
ok: 'ok',
lastRun: 'last_run:',
lastSuccess: 'last_success:',
@@ -1770,12 +1772,50 @@ export default {
errorsSla: 'Errors (SLA scope)',
upstreamExcl429529: 'Upstream (excl 429/529)',
failedToLoadData: 'Failed to load ops data.',
failedToLoadOverview: 'Failed to load overview',
failedToLoadThroughputTrend: 'Failed to load throughput trend',
failedToLoadLatencyHistogram: 'Failed to load latency histogram',
failedToLoadErrorTrend: 'Failed to load error trend',
failedToLoadErrorDistribution: 'Failed to load error distribution',
failedToLoadErrorDetail: 'Failed to load error detail',
retryFailed: 'Retry failed',
tpsK: 'TPS (K)',
top: 'Top:',
throughputTrend: 'Throughput Trend',
latencyHistogram: 'Latency Histogram',
errorTrend: 'Error Trend',
errorDistribution: 'Error Distribution',
// Health Score & Diagnosis
health: 'Health',
healthCondition: 'Health Condition',
healthHelp: 'Overall system health score based on SLA, error rate, and resource usage',
healthyStatus: 'Healthy',
riskyStatus: 'At Risk',
idleStatus: 'Idle',
diagnosis: {
title: 'Smart Diagnosis',
footer: 'Automated diagnostic suggestions based on current metrics',
idle: 'System is currently idle',
idleImpact: 'No active traffic',
upstreamCritical: 'Upstream error rate critically high ({rate}%)',
upstreamCriticalImpact: 'May affect many user requests',
upstreamHigh: 'Upstream error rate elevated ({rate}%)',
upstreamHighImpact: 'Recommend checking upstream service status',
slaCritical: 'SLA critically below target ({sla}%)',
slaCriticalImpact: 'User experience severely degraded',
slaLow: 'SLA below target ({sla}%)',
slaLowImpact: 'Service quality needs attention',
errorHigh: 'Error rate too high ({rate}%)',
errorHighImpact: 'Many requests failing',
errorElevated: 'Error rate elevated ({rate}%)',
errorElevatedImpact: 'Recommend checking error logs',
healthCritical: 'Overall health score critically low ({score})',
healthCriticalImpact: 'Multiple metrics may be degraded; prioritize error rate and latency investigation',
healthLow: 'Overall health score low ({score})',
healthLowImpact: 'May indicate minor instability; monitor SLA and error rates',
healthy: 'All system metrics normal',
healthyImpact: 'Service running stable'
},
// Error Log
errorLog: {
timeId: 'Time / ID',
@@ -2069,7 +2109,21 @@ export default {
throughputTrend: 'Requests/QPS + Tokens/TPS in the selected window.',
latencyHistogram: 'Latency distribution (duration_ms) for successful requests.',
errorTrend: 'Error counts over time (SLA scope excludes business limits; upstream excludes 429/529).',
errorDistribution: 'Error distribution by status code.'
errorDistribution: 'Error distribution by status code.',
goroutines:
'Number of Go runtime goroutines (lightweight threads). There is no absolute “safe” number—use your historical baseline. Heuristic: <2k is common; 2k8k watch; >8k plus rising queue/latency often suggests blocking/leaks.',
cpu: 'CPU usage percentage, showing system processor load.',
memory: 'Memory usage, including used and total available memory.',
db: 'Database connection pool status, including active, idle, and waiting connections.',
redis: 'Redis connection pool status, showing active and idle connections.',
jobs: 'Background job execution status, including last run time, success time, and error information.',
qps: 'Queries Per Second (QPS) and Tokens Per Second (TPS), real-time system throughput.',
tokens: 'Total number of tokens processed in the current time window.',
sla: 'Service Level Agreement success rate, excluding business limits (e.g., insufficient balance, quota exceeded).',
errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
latency: 'Request latency statistics, including p50, p90, p95, p99 percentiles.',
ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
},
charts: {
emptyRequest: 'No requests in this window.',
@@ -2183,7 +2237,9 @@ export default {
queryModeHint: 'Default query mode for Ops Dashboard (auto/raw/preagg)',
queryModeAuto: 'Auto (recommended)',
queryModeRaw: 'Raw (most accurate, slower)',
queryModePreagg: 'Preagg (fastest, requires aggregation)'
queryModePreagg: 'Preagg (fastest, requires aggregation)',
metricsInterval: 'Metrics Collection Interval (seconds)',
metricsIntervalHint: 'How often to collect system/request metrics (60-3600 seconds)'
},
adminApiKey: {
title: 'Admin API Key',

View File

@@ -1878,8 +1878,10 @@ export default {
redis: 'Redis',
goroutines: '协程',
jobs: '后台任务',
jobsHelp: '点击“明细”查看任务心跳与报错信息',
active: '活跃',
idle: '空闲',
waiting: '等待',
ok: '正常',
lastRun: '最近运行',
lastSuccess: '最近成功',
@@ -1898,8 +1900,8 @@ export default {
errors: '错误',
errorRate: '错误率:',
upstreamRate: '上游错误率:',
latencyDuration: '延迟 (duration_ms)',
ttftLabel: 'TTFT (first_token_ms)',
latencyDuration: '延迟(毫秒)',
ttftLabel: '首字延迟(毫秒)',
p50: 'p50',
p90: 'p90',
p95: 'p95',
@@ -1915,12 +1917,50 @@ export default {
errorsSla: '错误SLA范围',
upstreamExcl429529: '上游排除429/529',
failedToLoadData: '加载运维数据失败',
tpsK: 'TPS (K)',
failedToLoadOverview: '加载概览数据失败',
failedToLoadThroughputTrend: '加载吞吐趋势失败',
failedToLoadLatencyHistogram: '加载延迟分布失败',
failedToLoadErrorTrend: '加载错误趋势失败',
failedToLoadErrorDistribution: '加载错误分布失败',
failedToLoadErrorDetail: '加载错误详情失败',
retryFailed: '重试失败',
tpsK: 'TPS',
top: '最高:',
throughputTrend: '吞吐趋势',
latencyHistogram: '延迟分布',
errorTrend: '错误趋势',
errorDistribution: '错误分布',
// Health Score & Diagnosis
health: '健康',
healthCondition: '健康状况',
healthHelp: '基于 SLA、错误率和资源使用情况的系统整体健康评分',
healthyStatus: '健康',
riskyStatus: '风险',
idleStatus: '待机',
diagnosis: {
title: '智能诊断',
footer: '基于当前指标的自动诊断建议',
idle: '系统当前处于待机状态',
idleImpact: '无活跃流量',
upstreamCritical: '上游错误率严重偏高 ({rate}%)',
upstreamCriticalImpact: '可能影响大量用户请求',
upstreamHigh: '上游错误率偏高 ({rate}%)',
upstreamHighImpact: '建议检查上游服务状态',
slaCritical: 'SLA 严重低于目标 ({sla}%)',
slaCriticalImpact: '用户体验严重受损',
slaLow: 'SLA 低于目标 ({sla}%)',
slaLowImpact: '需要关注服务质量',
errorHigh: '错误率过高 ({rate}%)',
errorHighImpact: '大量请求失败',
errorElevated: '错误率偏高 ({rate}%)',
errorElevatedImpact: '建议检查错误日志',
healthCritical: '综合健康评分过低 ({score})',
healthCriticalImpact: '多个指标可能同时异常,建议优先排查错误与延迟',
healthLow: '综合健康评分偏低 ({score})',
healthLowImpact: '可能存在轻度波动,建议关注 SLA 与错误率',
healthy: '所有系统指标正常',
healthyImpact: '服务运行稳定'
},
// Error Log
errorLog: {
timeId: '时间 / ID',
@@ -2212,9 +2252,23 @@ export default {
},
tooltips: {
throughputTrend: '当前窗口内的请求/QPS 与 token/TPS 趋势。',
latencyHistogram: '成功请求的延迟分布(duration_ms)。',
latencyHistogram: '成功请求的延迟分布(毫秒)。',
errorTrend: '错误趋势SLA 口径排除业务限制;上游错误率排除 429/529。',
errorDistribution: '按状态码统计的错误分布。'
errorDistribution: '按状态码统计的错误分布。',
goroutines:
'Go 运行时的协程数量(轻量级线程)。没有绝对“安全值”,建议以历史基线为准。经验参考:<2000 常见2000-8000 需关注;>8000 且伴随队列/延迟上升时,优先排查阻塞/泄漏。',
cpu: 'CPU 使用率,显示系统处理器的负载情况。',
memory: '内存使用率,包括已使用和总可用内存。',
db: '数据库连接池状态,包括活跃连接、空闲连接和等待连接数。',
redis: 'Redis 连接池状态,显示活跃和空闲的连接数。',
jobs: '后台任务执行状态,包括最近运行时间、成功时间和错误信息。',
qps: '每秒查询数QPS和每秒Token数TPS实时显示系统吞吐量。',
tokens: '当前时间窗口内处理的总Token数量。',
sla: '服务等级协议达成率,排除业务限制(如余额不足、配额超限)的成功请求占比。',
errors: '错误统计,包括总错误数、错误率和上游错误率。',
latency: '请求延迟统计,包括 p50、p90、p95、p99 等百分位数。',
ttft: '首Token延迟Time To First Token衡量流式响应的首字节返回速度。',
health: '系统健康评分0-100综合考虑 SLA、错误率和资源使用情况。'
},
charts: {
emptyRequest: '该时间窗口内暂无请求。',
@@ -2320,14 +2374,16 @@ export default {
description: '启用运维监控模块,用于排障与健康可视化',
disabled: '运维监控已关闭',
enabled: '启用运维监控',
enabledHint: '启用 Ops 运维监控模块(仅管理员可见)',
enabledHint: '启用运维监控模块(仅管理员可见)',
realtimeEnabled: '启用实时监控',
realtimeEnabledHint: '启用实时 QPS/指标推送WebSocket',
realtimeEnabledHint: '启用实时请求速率和指标推送WebSocket',
queryMode: '默认查询模式',
queryModeHint: 'Ops Dashboard 默认查询模式auto/raw/preagg',
queryModeHint: '运维监控默认查询模式(自动/原始/预聚合',
queryModeAuto: '自动(推荐)',
queryModeRaw: 'Raw(最准,但较慢)',
queryModePreagg: 'Preagg(最快,需预聚合)'
queryModeRaw: '原始(最准,但较慢)',
queryModePreagg: '预聚合(最快,需预聚合)',
metricsInterval: '采集频率(秒)',
metricsIntervalHint: '系统/请求指标采集频率60-3600 秒)'
},
adminApiKey: {
title: '管理员 API Key',

View File

@@ -715,6 +715,25 @@
class="w-[220px]"
/>
</div>
<div v-if="form.ops_monitoring_enabled" class="mt-5 flex items-center justify-between">
<div>
<label class="font-medium text-gray-900 dark:text-white">{{
t('admin.settings.opsMonitoring.metricsInterval')
}}</label>
<p class="text-sm text-gray-500 dark:text-gray-400">
{{ t('admin.settings.opsMonitoring.metricsIntervalHint') }}
</p>
</div>
<input
v-model.number="form.ops_metrics_interval_seconds"
type="number"
min="60"
max="3600"
step="10"
class="w-[220px] rounded-lg border border-gray-300 bg-white px-3 py-2 text-sm text-gray-900 shadow-sm focus:border-primary-500 focus:outline-none focus:ring-1 focus:ring-primary-500 dark:border-dark-600 dark:bg-dark-800 dark:text-white"
/>
</div>
</div>
</div>
@@ -824,7 +843,8 @@ const form = reactive<SettingsForm>({
// Ops Monitoring (vNext)
ops_monitoring_enabled: true,
ops_realtime_monitoring_enabled: true,
ops_query_mode_default: 'auto'
ops_query_mode_default: 'auto',
ops_metrics_interval_seconds: 60
})
const opsQueryModeOptions = computed(() => [
@@ -922,7 +942,8 @@ async function saveSettings() {
identity_patch_prompt: form.identity_patch_prompt,
ops_monitoring_enabled: form.ops_monitoring_enabled,
ops_realtime_monitoring_enabled: form.ops_realtime_monitoring_enabled,
ops_query_mode_default: form.ops_query_mode_default
ops_query_mode_default: form.ops_query_mode_default,
ops_metrics_interval_seconds: form.ops_metrics_interval_seconds
}
const updated = await adminAPI.settings.updateSettings(payload)
Object.assign(form, updated)

View File

@@ -33,190 +33,6 @@
@open-error-details="openErrorDetails"
/>
<!-- Overview -->
<div
v-if="opsEnabled && !(loading && !hasLoadedOnce)"
class="overflow-hidden rounded-3xl bg-white shadow-sm ring-1 ring-gray-900/5 dark:bg-dark-800 dark:ring-dark-700"
>
<div class="border-b border-gray-100 px-6 py-4 dark:border-dark-700">
<h3 class="text-base font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.systemHealth') }}</h3>
</div>
<div class="p-6">
<div v-if="loadingOverview" class="flex items-center justify-center py-10">
<div class="h-8 w-8 animate-spin rounded-full border-b-2 border-primary-600"></div>
</div>
<div v-else-if="!overview?.system_metrics" class="py-6 text-sm text-gray-500 dark:text-gray-400">
{{ t('admin.ops.noSystemMetrics') }}
</div>
<div v-else class="space-y-6">
<div class="text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.ops.collectedAt') }} {{ formatDateTime(overview.system_metrics.created_at) }} ({{ t('admin.ops.window') }}
{{ overview.system_metrics.window_minutes }}m)
</div>
<div class="grid grid-cols-1 gap-4 md:grid-cols-5">
<div class="rounded-xl bg-gray-50 p-4 dark:bg-dark-800/50">
<div class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.cpu') }}</div>
<div class="mt-1 text-xl font-semibold text-gray-900 dark:text-white">
{{ formatPercent0to100(overview.system_metrics.cpu_usage_percent) }}
</div>
</div>
<div class="rounded-xl bg-gray-50 p-4 dark:bg-dark-800/50">
<div class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.memory') }}</div>
<div class="mt-1 text-xl font-semibold text-gray-900 dark:text-white">
{{ formatPercent0to100(overview.system_metrics.memory_usage_percent) }}
</div>
<div class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ formatMBPair(overview.system_metrics.memory_used_mb, overview.system_metrics.memory_total_mb) }}
</div>
</div>
<div class="rounded-xl bg-gray-50 p-4 dark:bg-dark-800/50">
<div class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.db') }}</div>
<div class="mt-1 text-xl font-semibold" :class="boolOkClass(overview.system_metrics.db_ok)">
{{ boolOkLabel(overview.system_metrics.db_ok) }}
</div>
<div class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.ops.active') }}: {{ overview.system_metrics.db_conn_active ?? '-' }}, {{ t('admin.ops.idle') }}:
{{ overview.system_metrics.db_conn_idle ?? '-' }}
</div>
</div>
<div class="rounded-xl bg-gray-50 p-4 dark:bg-dark-800/50">
<div class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.redis') }}</div>
<div class="mt-1 text-xl font-semibold" :class="boolOkClass(overview.system_metrics.redis_ok)">
{{ boolOkLabel(overview.system_metrics.redis_ok) }}
</div>
</div>
<div class="rounded-xl bg-gray-50 p-4 dark:bg-dark-800/50">
<div class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.goroutines') }}</div>
<div class="mt-1 text-xl font-semibold text-gray-900 dark:text-white">
{{ overview.system_metrics.goroutine_count ?? '-' }}
</div>
</div>
</div>
<div v-if="overview?.job_heartbeats?.length" class="rounded-xl border border-gray-100 dark:border-dark-700">
<div class="border-b border-gray-100 px-4 py-3 text-sm font-semibold text-gray-900 dark:border-dark-700 dark:text-white">
{{ t('admin.ops.jobs') }}
</div>
<div class="divide-y divide-gray-100 dark:divide-dark-700">
<div
v-for="job in overview.job_heartbeats"
:key="job.job_name"
class="flex flex-col gap-1 px-4 py-3 md:flex-row md:items-center md:justify-between"
>
<div class="text-sm font-medium text-gray-900 dark:text-white">
{{ job.job_name }}
</div>
<div class="text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.ops.lastRun') }}: {{ job.last_run_at ? formatDateTime(job.last_run_at) : '-' }} · {{ t('admin.ops.lastSuccess') }}:
{{ job.last_success_at ? formatDateTime(job.last_success_at) : '-' }} ·
<span v-if="job.last_error" class="text-rose-600 dark:text-rose-400">
{{ t('admin.ops.lastError') }}: {{ job.last_error }}
</span>
<span v-else class="text-emerald-600 dark:text-emerald-400">{{ t('admin.ops.ok') }}</span>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<div v-if="opsEnabled && !(loading && !hasLoadedOnce)" class="card">
<div class="border-b border-gray-100 px-6 py-4 dark:border-dark-700">
<h3 class="text-base font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.overview') }}</h3>
</div>
<div class="p-6">
<div v-if="loadingOverview" class="flex items-center justify-center py-10">
<div class="h-8 w-8 animate-spin rounded-full border-b-2 border-primary-600"></div>
</div>
<div v-else-if="!overview" class="py-6 text-sm text-gray-500 dark:text-gray-400">
{{ t('admin.ops.noData') }}
</div>
<div v-else class="space-y-6">
<div class="grid grid-cols-1 gap-4 md:grid-cols-4">
<div class="rounded-xl bg-gray-50 p-4 dark:bg-dark-800/50">
<div class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.requestsTotal') }}</div>
<div class="mt-1 text-xl font-semibold text-gray-900 dark:text-white">
{{ formatInt(overview.request_count_total) }}
</div>
<div class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.ops.slaScope') }} {{ formatInt(overview.request_count_sla) }}
</div>
</div>
<div class="rounded-xl bg-gray-50 p-4 dark:bg-dark-800/50">
<div class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.tokens') }}</div>
<div class="mt-1 text-xl font-semibold text-gray-900 dark:text-white">
{{ formatInt(overview.token_consumed) }}
</div>
<div class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.ops.tps') }} {{ overview.tps.current }} ({{ t('admin.ops.peak') }} {{ overview.tps.peak }})
</div>
</div>
<div class="rounded-xl bg-gray-50 p-4 dark:bg-dark-800/50">
<div class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.sla') }}</div>
<div class="mt-1 text-xl font-semibold text-gray-900 dark:text-white">
{{ formatPercent(overview.sla) }}
</div>
<div class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.ops.businessLimited') }}: {{ formatInt(overview.business_limited_count) }}
</div>
</div>
<div class="rounded-xl bg-gray-50 p-4 dark:bg-dark-800/50">
<div class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.ops.errors') }}</div>
<div class="mt-1 text-xs text-gray-600 dark:text-gray-300">
{{ t('admin.ops.errorRate') }}: <span class="font-semibold">{{ formatPercent(overview.error_rate) }}</span>
</div>
<div class="mt-1 text-xs text-gray-600 dark:text-gray-300">
{{ t('admin.ops.upstreamRate') }}: <span class="font-semibold">{{ formatPercent(overview.upstream_error_rate) }}</span>
</div>
<div class="mt-1 text-xs text-gray-500 dark:text-gray-400">
429: {{ formatInt(overview.upstream_429_count) }} · 529:
{{ formatInt(overview.upstream_529_count) }}
</div>
</div>
</div>
<div class="grid grid-cols-1 gap-4 md:grid-cols-2">
<div class="rounded-xl border border-gray-200 bg-white p-4 dark:border-dark-700 dark:bg-dark-900">
<div class="text-sm font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.latencyDuration') }}</div>
<div class="mt-3 grid grid-cols-2 gap-2 text-xs text-gray-600 dark:text-gray-300 md:grid-cols-3">
<div>{{ t('admin.ops.p50') }}: <span class="font-mono">{{ formatMs(overview.duration.p50_ms) }}</span></div>
<div>{{ t('admin.ops.p90') }}: <span class="font-mono">{{ formatMs(overview.duration.p90_ms) }}</span></div>
<div>{{ t('admin.ops.p95') }}: <span class="font-mono">{{ formatMs(overview.duration.p95_ms) }}</span></div>
<div>{{ t('admin.ops.p99') }}: <span class="font-mono">{{ formatMs(overview.duration.p99_ms) }}</span></div>
<div>{{ t('admin.ops.avg') }}: <span class="font-mono">{{ formatMs(overview.duration.avg_ms) }}</span></div>
<div>{{ t('admin.ops.max') }}: <span class="font-mono">{{ formatMs(overview.duration.max_ms) }}</span></div>
</div>
</div>
<div class="rounded-xl border border-gray-200 bg-white p-4 dark:border-dark-700 dark:bg-dark-900">
<div class="text-sm font-semibold text-gray-900 dark:text-white">{{ t('admin.ops.ttftLabel') }}</div>
<div class="mt-3 grid grid-cols-2 gap-2 text-xs text-gray-600 dark:text-gray-300 md:grid-cols-3">
<div>{{ t('admin.ops.p50') }}: <span class="font-mono">{{ formatMs(overview.ttft.p50_ms) }}</span></div>
<div>{{ t('admin.ops.p90') }}: <span class="font-mono">{{ formatMs(overview.ttft.p90_ms) }}</span></div>
<div>{{ t('admin.ops.p95') }}: <span class="font-mono">{{ formatMs(overview.ttft.p95_ms) }}</span></div>
<div>{{ t('admin.ops.p99') }}: <span class="font-mono">{{ formatMs(overview.ttft.p99_ms) }}</span></div>
<div>{{ t('admin.ops.avg') }}: <span class="font-mono">{{ formatMs(overview.ttft.avg_ms) }}</span></div>
<div>{{ t('admin.ops.max') }}: <span class="font-mono">{{ formatMs(overview.ttft.max_ms) }}</span></div>
</div>
</div>
</div>
</div>
</div>
</div>
<!-- Row: Concurrency + Throughput -->
<div v-if="opsEnabled && !(loading && !hasLoadedOnce)" class="grid grid-cols-1 gap-6 lg:grid-cols-3">
<div class="lg:col-span-1 min-h-[360px]">
@@ -308,7 +124,6 @@ import OpsLatencyChart from './components/OpsLatencyChart.vue'
import OpsThroughputTrendChart from './components/OpsThroughputTrendChart.vue'
import OpsAlertEventsCard from './components/OpsAlertEventsCard.vue'
import OpsRequestDetailsModal, { type OpsRequestDetailsPreset } from './components/OpsRequestDetailsModal.vue'
import { formatDateTime, formatNumberLocaleString } from '@/utils/format'
const route = useRoute()
const router = useRouter()
@@ -486,7 +301,6 @@ const syncQueryToRoute = useDebounceFn(async () => {
}, 250)
const overview = ref<OpsDashboardOverview | null>(null)
const loadingOverview = ref(false)
const throughputTrend = ref<OpsThroughputTrendResponse | null>(null)
const loadingTrend = ref(false)
@@ -523,12 +337,15 @@ function handleThroughputSelectGroup(nextGroupId: number) {
groupId.value = id
}
function handleOpenRequestDetails() {
requestDetailsPreset.value = {
function handleOpenRequestDetails(preset?: OpsRequestDetailsPreset) {
const basePreset: OpsRequestDetailsPreset = {
title: t('admin.ops.requestDetails.title'),
kind: 'all',
sort: 'created_at_desc'
}
requestDetailsPreset.value = { ...basePreset, ...(preset ?? {}) }
if (!requestDetailsPreset.value.title) requestDetailsPreset.value.title = basePreset.title
showRequestDetails.value = true
}
@@ -573,46 +390,8 @@ function openError(id: number) {
showErrorModal.value = true
}
function formatInt(v: number | null | undefined): string {
if (typeof v !== 'number') return '0'
return formatNumberLocaleString(v)
}
function formatPercent(v: number | null | undefined): string {
if (typeof v !== 'number') return '-'
return `${(v * 100).toFixed(2)}%`
}
function formatPercent0to100(v: number | null | undefined): string {
if (typeof v !== 'number') return '-'
return `${v.toFixed(1)}%`
}
function formatMBPair(used: number | null | undefined, total: number | null | undefined): string {
if (typeof used !== 'number' || typeof total !== 'number') return '-'
return `${formatNumberLocaleString(used)} / ${formatNumberLocaleString(total)} MB`
}
function boolOkLabel(v: boolean | null | undefined): string {
if (v === true) return 'OK'
if (v === false) return 'FAIL'
return '-'
}
function boolOkClass(v: boolean | null | undefined): string {
if (v === true) return 'text-emerald-600 dark:text-emerald-400'
if (v === false) return 'text-rose-600 dark:text-rose-400'
return 'text-gray-900 dark:text-white'
}
function formatMs(v: number | null | undefined): string {
if (v == null) return '-'
return `${v}ms`
}
async function refreshOverviewWithCancel(fetchSeq: number, signal: AbortSignal) {
if (!opsEnabled.value) return
loadingOverview.value = true
try {
const data = await opsAPI.getDashboardOverview(
{
@@ -628,11 +407,7 @@ async function refreshOverviewWithCancel(fetchSeq: number, signal: AbortSignal)
} catch (err: any) {
if (fetchSeq !== dashboardFetchSeq || isCanceledRequest(err)) return
overview.value = null
appStore.showError(err?.message || 'Failed to load overview')
} finally {
if (fetchSeq === dashboardFetchSeq) {
loadingOverview.value = false
}
appStore.showError(err?.message || t('admin.ops.failedToLoadOverview'))
}
}
@@ -654,7 +429,7 @@ async function refreshThroughputTrendWithCancel(fetchSeq: number, signal: AbortS
} catch (err: any) {
if (fetchSeq !== dashboardFetchSeq || isCanceledRequest(err)) return
throughputTrend.value = null
appStore.showError(err?.message || 'Failed to load throughput trend')
appStore.showError(err?.message || t('admin.ops.failedToLoadThroughputTrend'))
} finally {
if (fetchSeq === dashboardFetchSeq) {
loadingTrend.value = false
@@ -680,7 +455,7 @@ async function refreshLatencyHistogramWithCancel(fetchSeq: number, signal: Abort
} catch (err: any) {
if (fetchSeq !== dashboardFetchSeq || isCanceledRequest(err)) return
latencyHistogram.value = null
appStore.showError(err?.message || 'Failed to load latency histogram')
appStore.showError(err?.message || t('admin.ops.failedToLoadLatencyHistogram'))
} finally {
if (fetchSeq === dashboardFetchSeq) {
loadingLatency.value = false
@@ -706,7 +481,7 @@ async function refreshErrorTrendWithCancel(fetchSeq: number, signal: AbortSignal
} catch (err: any) {
if (fetchSeq !== dashboardFetchSeq || isCanceledRequest(err)) return
errorTrend.value = null
appStore.showError(err?.message || 'Failed to load error trend')
appStore.showError(err?.message || t('admin.ops.failedToLoadErrorTrend'))
} finally {
if (fetchSeq === dashboardFetchSeq) {
loadingErrorTrend.value = false
@@ -732,7 +507,7 @@ async function refreshErrorDistributionWithCancel(fetchSeq: number, signal: Abor
} catch (err: any) {
if (fetchSeq !== dashboardFetchSeq || isCanceledRequest(err)) return
errorDistribution.value = null
appStore.showError(err?.message || 'Failed to load error distribution')
appStore.showError(err?.message || t('admin.ops.failedToLoadErrorDistribution'))
} finally {
if (fetchSeq === dashboardFetchSeq) {
loadingErrorDistribution.value = false

View File

@@ -286,7 +286,7 @@ async function fetchDetail(id: number) {
}
} catch (err: any) {
detail.value = null
appStore.showError(err?.message || 'Failed to load error detail')
appStore.showError(err?.message || t('admin.ops.failedToLoadErrorDetail'))
} finally {
loading.value = false
}
@@ -348,7 +348,7 @@ async function runConfirmedRetry() {
const summary = res.status === 'succeeded' ? t('admin.ops.errorDetail.retrySuccess') : t('admin.ops.errorDetail.retryFailed')
appStore.showSuccess(summary)
} catch (err: any) {
appStore.showError(err?.message || 'Retry failed')
appStore.showError(err?.message || t('admin.ops.retryFailed'))
} finally {
retrying.value = false
}