feat(ops): 优化健康评分算法和智能诊断机制
- 采用分层加权评分(业务70% + 基础设施30%),避免重复扣分 - 新增延迟诊断(P99 > 2s critical, > 1s warning) - 新增资源诊断(CPU/内存/DB/Redis状态) - 调整诊断阈值(上游错误率5% critical,请求错误率3% critical) - 为每个诊断项添加可操作建议 - 添加完整的单元测试覆盖(30+测试用例) - 完善中英文国际化文本
This commit is contained in:
@@ -287,6 +287,7 @@ interface DiagnosisItem {
|
||||
type: 'critical' | 'warning' | 'info'
|
||||
message: string
|
||||
impact: string
|
||||
action?: string
|
||||
}
|
||||
|
||||
const diagnosisReport = computed<DiagnosisItem[]>(() => {
|
||||
@@ -304,63 +305,157 @@ const diagnosisReport = computed<DiagnosisItem[]>(() => {
|
||||
return report
|
||||
}
|
||||
|
||||
const upstreamRatePct = (ov.upstream_error_rate ?? 0) * 100
|
||||
if (upstreamRatePct > 10) {
|
||||
// Resource diagnostics (highest priority)
|
||||
const sm = ov.system_metrics
|
||||
if (sm) {
|
||||
if (sm.db_ok === false) {
|
||||
report.push({
|
||||
type: 'critical',
|
||||
message: t('admin.ops.diagnosis.dbDown'),
|
||||
impact: t('admin.ops.diagnosis.dbDownImpact'),
|
||||
action: t('admin.ops.diagnosis.dbDownAction')
|
||||
})
|
||||
}
|
||||
if (sm.redis_ok === false) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.redisDown'),
|
||||
impact: t('admin.ops.diagnosis.redisDownImpact'),
|
||||
action: t('admin.ops.diagnosis.redisDownAction')
|
||||
})
|
||||
}
|
||||
|
||||
const cpuPct = sm.cpu_usage_percent ?? 0
|
||||
if (cpuPct > 90) {
|
||||
report.push({
|
||||
type: 'critical',
|
||||
message: t('admin.ops.diagnosis.cpuCritical', { usage: cpuPct.toFixed(1) }),
|
||||
impact: t('admin.ops.diagnosis.cpuCriticalImpact'),
|
||||
action: t('admin.ops.diagnosis.cpuCriticalAction')
|
||||
})
|
||||
} else if (cpuPct > 80) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.cpuHigh', { usage: cpuPct.toFixed(1) }),
|
||||
impact: t('admin.ops.diagnosis.cpuHighImpact'),
|
||||
action: t('admin.ops.diagnosis.cpuHighAction')
|
||||
})
|
||||
}
|
||||
|
||||
const memPct = sm.memory_usage_percent ?? 0
|
||||
if (memPct > 90) {
|
||||
report.push({
|
||||
type: 'critical',
|
||||
message: t('admin.ops.diagnosis.memoryCritical', { usage: memPct.toFixed(1) }),
|
||||
impact: t('admin.ops.diagnosis.memoryCriticalImpact'),
|
||||
action: t('admin.ops.diagnosis.memoryCriticalAction')
|
||||
})
|
||||
} else if (memPct > 85) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.memoryHigh', { usage: memPct.toFixed(1) }),
|
||||
impact: t('admin.ops.diagnosis.memoryHighImpact'),
|
||||
action: t('admin.ops.diagnosis.memoryHighAction')
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Latency diagnostics
|
||||
const durationP99 = ov.duration?.p99_ms ?? 0
|
||||
if (durationP99 > 2000) {
|
||||
report.push({
|
||||
type: 'critical',
|
||||
message: t('admin.ops.diagnosis.upstreamCritical', { rate: upstreamRatePct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.upstreamCriticalImpact')
|
||||
message: t('admin.ops.diagnosis.latencyCritical', { latency: durationP99.toFixed(0) }),
|
||||
impact: t('admin.ops.diagnosis.latencyCriticalImpact'),
|
||||
action: t('admin.ops.diagnosis.latencyCriticalAction')
|
||||
})
|
||||
} else if (upstreamRatePct > 3) {
|
||||
} else if (durationP99 > 1000) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.upstreamHigh', { rate: upstreamRatePct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.upstreamHighImpact')
|
||||
message: t('admin.ops.diagnosis.latencyHigh', { latency: durationP99.toFixed(0) }),
|
||||
impact: t('admin.ops.diagnosis.latencyHighImpact'),
|
||||
action: t('admin.ops.diagnosis.latencyHighAction')
|
||||
})
|
||||
}
|
||||
|
||||
const ttftP99 = ov.ttft?.p99_ms ?? 0
|
||||
if (ttftP99 > 500) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.ttftHigh', { ttft: ttftP99.toFixed(0) }),
|
||||
impact: t('admin.ops.diagnosis.ttftHighImpact'),
|
||||
action: t('admin.ops.diagnosis.ttftHighAction')
|
||||
})
|
||||
}
|
||||
|
||||
// Error rate diagnostics (adjusted thresholds)
|
||||
const upstreamRatePct = (ov.upstream_error_rate ?? 0) * 100
|
||||
if (upstreamRatePct > 5) {
|
||||
report.push({
|
||||
type: 'critical',
|
||||
message: t('admin.ops.diagnosis.upstreamCritical', { rate: upstreamRatePct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.upstreamCriticalImpact'),
|
||||
action: t('admin.ops.diagnosis.upstreamCriticalAction')
|
||||
})
|
||||
} else if (upstreamRatePct > 2) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.upstreamHigh', { rate: upstreamRatePct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.upstreamHighImpact'),
|
||||
action: t('admin.ops.diagnosis.upstreamHighAction')
|
||||
})
|
||||
}
|
||||
|
||||
const errorPct = (ov.error_rate ?? 0) * 100
|
||||
if (errorPct > 3) {
|
||||
report.push({
|
||||
type: 'critical',
|
||||
message: t('admin.ops.diagnosis.errorHigh', { rate: errorPct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.errorHighImpact'),
|
||||
action: t('admin.ops.diagnosis.errorHighAction')
|
||||
})
|
||||
} else if (errorPct > 0.5) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.errorElevated', { rate: errorPct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.errorElevatedImpact'),
|
||||
action: t('admin.ops.diagnosis.errorElevatedAction')
|
||||
})
|
||||
}
|
||||
|
||||
// SLA diagnostics
|
||||
const slaPct = (ov.sla ?? 0) * 100
|
||||
if (slaPct < 90) {
|
||||
report.push({
|
||||
type: 'critical',
|
||||
message: t('admin.ops.diagnosis.slaCritical', { sla: slaPct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.slaCriticalImpact')
|
||||
impact: t('admin.ops.diagnosis.slaCriticalImpact'),
|
||||
action: t('admin.ops.diagnosis.slaCriticalAction')
|
||||
})
|
||||
} else if (slaPct < 98) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.slaLow', { sla: slaPct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.slaLowImpact')
|
||||
})
|
||||
}
|
||||
|
||||
const errorPct = (ov.error_rate ?? 0) * 100
|
||||
if (errorPct > 5) {
|
||||
report.push({
|
||||
type: 'critical',
|
||||
message: t('admin.ops.diagnosis.errorHigh', { rate: errorPct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.errorHighImpact')
|
||||
})
|
||||
} else if (errorPct > 1) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.errorElevated', { rate: errorPct.toFixed(2) }),
|
||||
impact: t('admin.ops.diagnosis.errorElevatedImpact')
|
||||
impact: t('admin.ops.diagnosis.slaLowImpact'),
|
||||
action: t('admin.ops.diagnosis.slaLowAction')
|
||||
})
|
||||
}
|
||||
|
||||
// Health score diagnostics (lowest priority)
|
||||
if (healthScoreValue.value != null) {
|
||||
if (healthScoreValue.value < 60) {
|
||||
report.push({
|
||||
type: 'critical',
|
||||
message: t('admin.ops.diagnosis.healthCritical', { score: healthScoreValue.value }),
|
||||
impact: t('admin.ops.diagnosis.healthCriticalImpact')
|
||||
impact: t('admin.ops.diagnosis.healthCriticalImpact'),
|
||||
action: t('admin.ops.diagnosis.healthCriticalAction')
|
||||
})
|
||||
} else if (healthScoreValue.value < 90) {
|
||||
report.push({
|
||||
type: 'warning',
|
||||
message: t('admin.ops.diagnosis.healthLow', { score: healthScoreValue.value }),
|
||||
impact: t('admin.ops.diagnosis.healthLowImpact')
|
||||
impact: t('admin.ops.diagnosis.healthLowImpact'),
|
||||
action: t('admin.ops.diagnosis.healthLowAction')
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -752,9 +847,12 @@ function openJobsDetails() {
|
||||
/>
|
||||
</svg>
|
||||
</div>
|
||||
<div>
|
||||
<div class="flex-1">
|
||||
<div class="text-xs font-semibold text-gray-900 dark:text-white">{{ item.message }}</div>
|
||||
<div class="mt-0.5 text-[11px] text-gray-500 dark:text-gray-400">{{ item.impact }}</div>
|
||||
<div v-if="item.action" class="mt-1 text-[11px] text-blue-600 dark:text-blue-400">
|
||||
💡 {{ item.action }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user