Merge pull request #303 from IanShaw027/feature/ops-account-health-score
feat(ops): 运维监控功能增强与优化
This commit is contained in:
@@ -296,9 +296,10 @@ INSERT INTO ops_job_heartbeats (
|
||||
last_error_at,
|
||||
last_error,
|
||||
last_duration_ms,
|
||||
last_result,
|
||||
updated_at
|
||||
) VALUES (
|
||||
$1,$2,$3,$4,$5,$6,NOW()
|
||||
$1,$2,$3,$4,$5,$6,$7,NOW()
|
||||
)
|
||||
ON CONFLICT (job_name) DO UPDATE SET
|
||||
last_run_at = COALESCE(EXCLUDED.last_run_at, ops_job_heartbeats.last_run_at),
|
||||
@@ -312,6 +313,10 @@ ON CONFLICT (job_name) DO UPDATE SET
|
||||
ELSE COALESCE(EXCLUDED.last_error, ops_job_heartbeats.last_error)
|
||||
END,
|
||||
last_duration_ms = COALESCE(EXCLUDED.last_duration_ms, ops_job_heartbeats.last_duration_ms),
|
||||
last_result = CASE
|
||||
WHEN EXCLUDED.last_success_at IS NOT NULL THEN COALESCE(EXCLUDED.last_result, ops_job_heartbeats.last_result)
|
||||
ELSE ops_job_heartbeats.last_result
|
||||
END,
|
||||
updated_at = NOW()`
|
||||
|
||||
_, err := r.db.ExecContext(
|
||||
@@ -323,6 +328,7 @@ ON CONFLICT (job_name) DO UPDATE SET
|
||||
opsNullTime(input.LastErrorAt),
|
||||
opsNullString(input.LastError),
|
||||
opsNullInt(input.LastDurationMs),
|
||||
opsNullString(input.LastResult),
|
||||
)
|
||||
return err
|
||||
}
|
||||
@@ -340,6 +346,7 @@ SELECT
|
||||
last_error_at,
|
||||
last_error,
|
||||
last_duration_ms,
|
||||
last_result,
|
||||
updated_at
|
||||
FROM ops_job_heartbeats
|
||||
ORDER BY job_name ASC`
|
||||
@@ -359,6 +366,8 @@ ORDER BY job_name ASC`
|
||||
var lastError sql.NullString
|
||||
var lastDuration sql.NullInt64
|
||||
|
||||
var lastResult sql.NullString
|
||||
|
||||
if err := rows.Scan(
|
||||
&item.JobName,
|
||||
&lastRun,
|
||||
@@ -366,6 +375,7 @@ ORDER BY job_name ASC`
|
||||
&lastErrorAt,
|
||||
&lastError,
|
||||
&lastDuration,
|
||||
&lastResult,
|
||||
&item.UpdatedAt,
|
||||
); err != nil {
|
||||
return nil, err
|
||||
@@ -391,6 +401,10 @@ ORDER BY job_name ASC`
|
||||
v := lastDuration.Int64
|
||||
item.LastDurationMs = &v
|
||||
}
|
||||
if lastResult.Valid {
|
||||
v := lastResult.String
|
||||
item.LastResult = &v
|
||||
}
|
||||
|
||||
out = append(out, &item)
|
||||
}
|
||||
|
||||
@@ -4,6 +4,7 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -235,11 +236,13 @@ func (s *OpsAggregationService) aggregateHourly() {
|
||||
successAt := finishedAt
|
||||
hbCtx, hbCancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer hbCancel()
|
||||
result := truncateString(fmt.Sprintf("window=%s..%s", start.Format(time.RFC3339), end.Format(time.RFC3339)), 2048)
|
||||
_ = s.opsRepo.UpsertJobHeartbeat(hbCtx, &OpsUpsertJobHeartbeatInput{
|
||||
JobName: opsAggHourlyJobName,
|
||||
LastRunAt: &runAt,
|
||||
LastSuccessAt: &successAt,
|
||||
LastDurationMs: &dur,
|
||||
LastResult: &result,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -331,11 +334,13 @@ func (s *OpsAggregationService) aggregateDaily() {
|
||||
successAt := finishedAt
|
||||
hbCtx, hbCancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer hbCancel()
|
||||
result := truncateString(fmt.Sprintf("window=%s..%s", start.Format(time.RFC3339), end.Format(time.RFC3339)), 2048)
|
||||
_ = s.opsRepo.UpsertJobHeartbeat(hbCtx, &OpsUpsertJobHeartbeatInput{
|
||||
JobName: opsAggDailyJobName,
|
||||
LastRunAt: &runAt,
|
||||
LastSuccessAt: &successAt,
|
||||
LastDurationMs: &dur,
|
||||
LastResult: &result,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -190,6 +190,13 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
||||
return
|
||||
}
|
||||
|
||||
rulesTotal := len(rules)
|
||||
rulesEnabled := 0
|
||||
rulesEvaluated := 0
|
||||
eventsCreated := 0
|
||||
eventsResolved := 0
|
||||
emailsSent := 0
|
||||
|
||||
now := time.Now().UTC()
|
||||
safeEnd := now.Truncate(time.Minute)
|
||||
if safeEnd.IsZero() {
|
||||
@@ -205,6 +212,7 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
||||
if rule == nil || !rule.Enabled || rule.ID <= 0 {
|
||||
continue
|
||||
}
|
||||
rulesEnabled++
|
||||
|
||||
scopePlatform, scopeGroupID, scopeRegion := parseOpsAlertRuleScope(rule.Filters)
|
||||
|
||||
@@ -220,6 +228,7 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
||||
s.resetRuleState(rule.ID, now)
|
||||
continue
|
||||
}
|
||||
rulesEvaluated++
|
||||
|
||||
breachedNow := compareMetric(metricValue, rule.Operator, rule.Threshold)
|
||||
required := requiredSustainedBreaches(rule.SustainedMinutes, interval)
|
||||
@@ -278,8 +287,11 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
||||
continue
|
||||
}
|
||||
|
||||
eventsCreated++
|
||||
if created != nil && created.ID > 0 {
|
||||
s.maybeSendAlertEmail(ctx, runtimeCfg, rule, created)
|
||||
if s.maybeSendAlertEmail(ctx, runtimeCfg, rule, created) {
|
||||
emailsSent++
|
||||
}
|
||||
}
|
||||
continue
|
||||
}
|
||||
@@ -289,11 +301,14 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
||||
resolvedAt := now
|
||||
if err := s.opsRepo.UpdateAlertEventStatus(ctx, activeEvent.ID, OpsAlertStatusResolved, &resolvedAt); err != nil {
|
||||
log.Printf("[OpsAlertEvaluator] resolve event failed (event=%d): %v", activeEvent.ID, err)
|
||||
} else {
|
||||
eventsResolved++
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
s.recordHeartbeatSuccess(runAt, time.Since(startedAt))
|
||||
result := truncateString(fmt.Sprintf("rules=%d enabled=%d evaluated=%d created=%d resolved=%d emails_sent=%d", rulesTotal, rulesEnabled, rulesEvaluated, eventsCreated, eventsResolved, emailsSent), 2048)
|
||||
s.recordHeartbeatSuccess(runAt, time.Since(startedAt), result)
|
||||
}
|
||||
|
||||
func (s *OpsAlertEvaluatorService) pruneRuleStates(rules []*OpsAlertRule) {
|
||||
@@ -585,32 +600,32 @@ func buildOpsAlertDescription(rule *OpsAlertRule, value float64, windowMinutes i
|
||||
)
|
||||
}
|
||||
|
||||
func (s *OpsAlertEvaluatorService) maybeSendAlertEmail(ctx context.Context, runtimeCfg *OpsAlertRuntimeSettings, rule *OpsAlertRule, event *OpsAlertEvent) {
|
||||
func (s *OpsAlertEvaluatorService) maybeSendAlertEmail(ctx context.Context, runtimeCfg *OpsAlertRuntimeSettings, rule *OpsAlertRule, event *OpsAlertEvent) bool {
|
||||
if s == nil || s.emailService == nil || s.opsService == nil || event == nil || rule == nil {
|
||||
return
|
||||
return false
|
||||
}
|
||||
if event.EmailSent {
|
||||
return
|
||||
return false
|
||||
}
|
||||
if !rule.NotifyEmail {
|
||||
return
|
||||
return false
|
||||
}
|
||||
|
||||
emailCfg, err := s.opsService.GetEmailNotificationConfig(ctx)
|
||||
if err != nil || emailCfg == nil || !emailCfg.Alert.Enabled {
|
||||
return
|
||||
return false
|
||||
}
|
||||
|
||||
if len(emailCfg.Alert.Recipients) == 0 {
|
||||
return
|
||||
return false
|
||||
}
|
||||
if !shouldSendOpsAlertEmailByMinSeverity(strings.TrimSpace(emailCfg.Alert.MinSeverity), strings.TrimSpace(rule.Severity)) {
|
||||
return
|
||||
return false
|
||||
}
|
||||
|
||||
if runtimeCfg != nil && runtimeCfg.Silencing.Enabled {
|
||||
if isOpsAlertSilenced(time.Now().UTC(), rule, event, runtimeCfg.Silencing) {
|
||||
return
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -639,6 +654,7 @@ func (s *OpsAlertEvaluatorService) maybeSendAlertEmail(ctx context.Context, runt
|
||||
if anySent {
|
||||
_ = s.opsRepo.UpdateAlertEventEmailSent(context.Background(), event.ID, true)
|
||||
}
|
||||
return anySent
|
||||
}
|
||||
|
||||
func buildOpsAlertEmailBody(rule *OpsAlertRule, event *OpsAlertEvent) string {
|
||||
@@ -806,7 +822,7 @@ func (s *OpsAlertEvaluatorService) maybeLogSkip(key string) {
|
||||
log.Printf("[OpsAlertEvaluator] leader lock held by another instance; skipping (key=%q)", key)
|
||||
}
|
||||
|
||||
func (s *OpsAlertEvaluatorService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration) {
|
||||
func (s *OpsAlertEvaluatorService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration, result string) {
|
||||
if s == nil || s.opsRepo == nil {
|
||||
return
|
||||
}
|
||||
@@ -814,11 +830,17 @@ func (s *OpsAlertEvaluatorService) recordHeartbeatSuccess(runAt time.Time, durat
|
||||
durMs := duration.Milliseconds()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
msg := strings.TrimSpace(result)
|
||||
if msg == "" {
|
||||
msg = "ok"
|
||||
}
|
||||
msg = truncateString(msg, 2048)
|
||||
_ = s.opsRepo.UpsertJobHeartbeat(ctx, &OpsUpsertJobHeartbeatInput{
|
||||
JobName: opsAlertEvaluatorJobName,
|
||||
LastRunAt: &runAt,
|
||||
LastSuccessAt: &now,
|
||||
LastDurationMs: &durMs,
|
||||
LastResult: &msg,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -149,7 +149,7 @@ func (s *OpsCleanupService) runScheduled() {
|
||||
log.Printf("[OpsCleanup] cleanup failed: %v", err)
|
||||
return
|
||||
}
|
||||
s.recordHeartbeatSuccess(runAt, time.Since(startedAt))
|
||||
s.recordHeartbeatSuccess(runAt, time.Since(startedAt), counts)
|
||||
log.Printf("[OpsCleanup] cleanup complete: %s", counts)
|
||||
}
|
||||
|
||||
@@ -330,12 +330,13 @@ func (s *OpsCleanupService) tryAcquireLeaderLock(ctx context.Context) (func(), b
|
||||
return release, true
|
||||
}
|
||||
|
||||
func (s *OpsCleanupService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration) {
|
||||
func (s *OpsCleanupService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration, counts opsCleanupDeletedCounts) {
|
||||
if s == nil || s.opsRepo == nil {
|
||||
return
|
||||
}
|
||||
now := time.Now().UTC()
|
||||
durMs := duration.Milliseconds()
|
||||
result := truncateString(counts.String(), 2048)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
_ = s.opsRepo.UpsertJobHeartbeat(ctx, &OpsUpsertJobHeartbeatInput{
|
||||
@@ -343,6 +344,7 @@ func (s *OpsCleanupService) recordHeartbeatSuccess(runAt time.Time, duration tim
|
||||
LastRunAt: &runAt,
|
||||
LastSuccessAt: &now,
|
||||
LastDurationMs: &durMs,
|
||||
LastResult: &result,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -235,6 +235,9 @@ type OpsUpsertJobHeartbeatInput struct {
|
||||
LastErrorAt *time.Time
|
||||
LastError *string
|
||||
LastDurationMs *int64
|
||||
|
||||
// LastResult is an optional human-readable summary of the last successful run.
|
||||
LastResult *string
|
||||
}
|
||||
|
||||
type OpsJobHeartbeat struct {
|
||||
@@ -245,6 +248,7 @@ type OpsJobHeartbeat struct {
|
||||
LastErrorAt *time.Time `json:"last_error_at"`
|
||||
LastError *string `json:"last_error"`
|
||||
LastDurationMs *int64 `json:"last_duration_ms"`
|
||||
LastResult *string `json:"last_result"`
|
||||
|
||||
UpdatedAt time.Time `json:"updated_at"`
|
||||
}
|
||||
|
||||
@@ -177,6 +177,10 @@ func (s *OpsScheduledReportService) runOnce() {
|
||||
return
|
||||
}
|
||||
|
||||
reportsTotal := len(reports)
|
||||
reportsDue := 0
|
||||
sentAttempts := 0
|
||||
|
||||
for _, report := range reports {
|
||||
if report == nil || !report.Enabled {
|
||||
continue
|
||||
@@ -184,14 +188,18 @@ func (s *OpsScheduledReportService) runOnce() {
|
||||
if report.NextRunAt.After(now) {
|
||||
continue
|
||||
}
|
||||
reportsDue++
|
||||
|
||||
if err := s.runReport(ctx, report, now); err != nil {
|
||||
attempts, err := s.runReport(ctx, report, now)
|
||||
if err != nil {
|
||||
s.recordHeartbeatError(runAt, time.Since(startedAt), err)
|
||||
return
|
||||
}
|
||||
sentAttempts += attempts
|
||||
}
|
||||
|
||||
s.recordHeartbeatSuccess(runAt, time.Since(startedAt))
|
||||
result := truncateString(fmt.Sprintf("reports=%d due=%d send_attempts=%d", reportsTotal, reportsDue, sentAttempts), 2048)
|
||||
s.recordHeartbeatSuccess(runAt, time.Since(startedAt), result)
|
||||
}
|
||||
|
||||
type opsScheduledReport struct {
|
||||
@@ -297,9 +305,9 @@ func (s *OpsScheduledReportService) listScheduledReports(ctx context.Context, no
|
||||
return out
|
||||
}
|
||||
|
||||
func (s *OpsScheduledReportService) runReport(ctx context.Context, report *opsScheduledReport, now time.Time) error {
|
||||
func (s *OpsScheduledReportService) runReport(ctx context.Context, report *opsScheduledReport, now time.Time) (int, error) {
|
||||
if s == nil || s.opsService == nil || s.emailService == nil || report == nil {
|
||||
return nil
|
||||
return 0, nil
|
||||
}
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
@@ -310,11 +318,11 @@ func (s *OpsScheduledReportService) runReport(ctx context.Context, report *opsSc
|
||||
|
||||
content, err := s.generateReportHTML(ctx, report, now)
|
||||
if err != nil {
|
||||
return err
|
||||
return 0, err
|
||||
}
|
||||
if strings.TrimSpace(content) == "" {
|
||||
// Skip sending when the report decides not to emit content (e.g., digest below min count).
|
||||
return nil
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
recipients := report.Recipients
|
||||
@@ -325,22 +333,24 @@ func (s *OpsScheduledReportService) runReport(ctx context.Context, report *opsSc
|
||||
}
|
||||
}
|
||||
if len(recipients) == 0 {
|
||||
return nil
|
||||
return 0, nil
|
||||
}
|
||||
|
||||
subject := fmt.Sprintf("[Ops Report] %s", strings.TrimSpace(report.Name))
|
||||
|
||||
attempts := 0
|
||||
for _, to := range recipients {
|
||||
addr := strings.TrimSpace(to)
|
||||
if addr == "" {
|
||||
continue
|
||||
}
|
||||
attempts++
|
||||
if err := s.emailService.SendEmail(ctx, addr, subject, content); err != nil {
|
||||
// Ignore per-recipient failures; continue best-effort.
|
||||
continue
|
||||
}
|
||||
}
|
||||
return nil
|
||||
return attempts, nil
|
||||
}
|
||||
|
||||
func (s *OpsScheduledReportService) generateReportHTML(ctx context.Context, report *opsScheduledReport, now time.Time) (string, error) {
|
||||
@@ -650,7 +660,7 @@ func (s *OpsScheduledReportService) setLastRunAt(ctx context.Context, reportType
|
||||
_ = s.redisClient.Set(ctx, key, strconv.FormatInt(t.UTC().Unix(), 10), 14*24*time.Hour).Err()
|
||||
}
|
||||
|
||||
func (s *OpsScheduledReportService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration) {
|
||||
func (s *OpsScheduledReportService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration, result string) {
|
||||
if s == nil || s.opsService == nil || s.opsService.opsRepo == nil {
|
||||
return
|
||||
}
|
||||
@@ -658,11 +668,17 @@ func (s *OpsScheduledReportService) recordHeartbeatSuccess(runAt time.Time, dura
|
||||
durMs := duration.Milliseconds()
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||
defer cancel()
|
||||
msg := strings.TrimSpace(result)
|
||||
if msg == "" {
|
||||
msg = "ok"
|
||||
}
|
||||
msg = truncateString(msg, 2048)
|
||||
_ = s.opsService.opsRepo.UpsertJobHeartbeat(ctx, &OpsUpsertJobHeartbeatInput{
|
||||
JobName: opsScheduledReportJobName,
|
||||
LastRunAt: &runAt,
|
||||
LastSuccessAt: &now,
|
||||
LastDurationMs: &durMs,
|
||||
LastResult: &msg,
|
||||
})
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,6 @@
|
||||
-- Add last_result to ops_job_heartbeats for UI job details.
|
||||
|
||||
ALTER TABLE IF EXISTS ops_job_heartbeats
|
||||
ADD COLUMN IF NOT EXISTS last_result TEXT;
|
||||
|
||||
COMMENT ON COLUMN ops_job_heartbeats.last_result IS 'Last successful run result summary (human readable).';
|
||||
@@ -293,6 +293,7 @@ export interface OpsJobHeartbeat {
|
||||
last_error_at?: string | null
|
||||
last_error?: string | null
|
||||
last_duration_ms?: number | null
|
||||
last_result?: string | null
|
||||
updated_at: string
|
||||
}
|
||||
|
||||
|
||||
@@ -1925,7 +1925,7 @@ export default {
|
||||
errors: 'Errors',
|
||||
errorRate: 'error_rate:',
|
||||
upstreamRate: 'upstream_rate:',
|
||||
latencyDuration: 'Request Duration (ms)',
|
||||
latencyDuration: 'Request Duration',
|
||||
ttftLabel: 'TTFT (first_token_ms)',
|
||||
p50: 'p50:',
|
||||
p90: 'p90:',
|
||||
@@ -2590,7 +2590,7 @@ export default {
|
||||
errors: 'Error statistics, including total errors, error rate, and upstream error rate.',
|
||||
upstreamErrors: 'Upstream error statistics, excluding rate limit errors (429/529).',
|
||||
latency: 'Request duration statistics, including p50, p90, p95, p99 percentiles.',
|
||||
ttft: 'Time To First Token, measuring the speed of first byte return in streaming responses.',
|
||||
ttft: 'Time To First Token, measuring the speed of first token return in streaming responses.',
|
||||
health: 'System health score (0-100), considering SLA, error rate, and resource usage.'
|
||||
},
|
||||
charts: {
|
||||
|
||||
@@ -2048,6 +2048,7 @@ export default {
|
||||
lastRun: '最近运行',
|
||||
lastSuccess: '最近成功',
|
||||
lastError: '最近错误',
|
||||
result: '结果',
|
||||
noData: '暂无数据',
|
||||
loadingText: '加载中...',
|
||||
ready: '就绪',
|
||||
@@ -2062,7 +2063,7 @@ export default {
|
||||
avgQps: '平均 QPS',
|
||||
avgTps: '平均 TPS',
|
||||
avgLatency: '平均请求时长',
|
||||
avgTtft: '平均首字延迟',
|
||||
avgTtft: '平均首 Token 延迟',
|
||||
exceptions: '异常数',
|
||||
requestErrors: '请求错误',
|
||||
errorCount: '错误数',
|
||||
@@ -2073,8 +2074,8 @@ export default {
|
||||
errors: '错误',
|
||||
errorRate: '错误率:',
|
||||
upstreamRate: '上游错误率:',
|
||||
latencyDuration: '请求时长(毫秒)',
|
||||
ttftLabel: '首字延迟(毫秒)',
|
||||
latencyDuration: '请求时长',
|
||||
ttftLabel: '首 Token 延迟(毫秒)',
|
||||
p50: 'p50',
|
||||
p90: 'p90',
|
||||
p95: 'p95',
|
||||
@@ -2117,7 +2118,12 @@ export default {
|
||||
'6h': '近6小时',
|
||||
'24h': '近24小时',
|
||||
'7d': '近7天',
|
||||
'30d': '近30天'
|
||||
'30d': '近30天',
|
||||
custom: '自定义'
|
||||
},
|
||||
customTimeRange: {
|
||||
startTime: '开始时间',
|
||||
endTime: '结束时间'
|
||||
},
|
||||
fullscreen: {
|
||||
enter: '进入全屏'
|
||||
@@ -2146,7 +2152,7 @@ export default {
|
||||
memoryHigh: '内存使用率偏高 ({usage}%)',
|
||||
memoryHighImpact: '内存压力较大,需要关注',
|
||||
memoryHighAction: '监控内存趋势,检查是否有内存泄漏',
|
||||
ttftHigh: '首字节时间偏高 ({ttft}ms)',
|
||||
ttftHigh: '首 Token 时间偏高 ({ttft}ms)',
|
||||
ttftHighImpact: '用户感知时长增加',
|
||||
ttftHighAction: '优化请求处理流程,减少前置逻辑耗时',
|
||||
// Error rate diagnostics
|
||||
@@ -2738,7 +2744,7 @@ export default {
|
||||
sla: '服务等级协议达成率,排除业务限制(如余额不足、配额超限)的成功请求占比。',
|
||||
errors: '错误统计,包括总错误数、错误率和上游错误率。',
|
||||
latency: '请求时长统计,包括 p50、p90、p95、p99 等百分位数。',
|
||||
ttft: '首Token延迟(Time To First Token),衡量流式响应的首字节返回速度。',
|
||||
ttft: '首 Token 延迟(Time To First Token),衡量流式响应的首 Token 返回速度。',
|
||||
health: '系统健康评分(0-100),综合考虑 SLA、错误率和资源使用情况。'
|
||||
},
|
||||
charts: {
|
||||
|
||||
@@ -414,7 +414,17 @@ const handleScroll = () => {
|
||||
menu.show = false
|
||||
}
|
||||
|
||||
onMounted(async () => { load(); try { const [p, g] = await Promise.all([adminAPI.proxies.getAll(), adminAPI.groups.getAll()]); proxies.value = p; groups.value = g } catch (error) { console.error('Failed to load proxies/groups:', error) }; window.addEventListener('scroll', handleScroll, true) })
|
||||
onMounted(async () => {
|
||||
load()
|
||||
try {
|
||||
const [p, g] = await Promise.all([adminAPI.proxies.getAll(), adminAPI.groups.getAll()])
|
||||
proxies.value = p
|
||||
groups.value = g
|
||||
} catch (error) {
|
||||
console.error('Failed to load proxies/groups:', error)
|
||||
}
|
||||
window.addEventListener('scroll', handleScroll, true)
|
||||
})
|
||||
|
||||
onUnmounted(() => {
|
||||
window.removeEventListener('scroll', handleScroll, true)
|
||||
|
||||
@@ -23,10 +23,13 @@
|
||||
:auto-refresh-enabled="autoRefreshEnabled"
|
||||
:auto-refresh-countdown="autoRefreshCountdown"
|
||||
:fullscreen="isFullscreen"
|
||||
:custom-start-time="customStartTime"
|
||||
:custom-end-time="customEndTime"
|
||||
@update:time-range="onTimeRangeChange"
|
||||
@update:platform="onPlatformChange"
|
||||
@update:group="onGroupChange"
|
||||
@update:query-mode="onQueryModeChange"
|
||||
@update:custom-time-range="onCustomTimeRangeChange"
|
||||
@refresh="fetchData"
|
||||
@open-request-details="handleOpenRequestDetails"
|
||||
@open-error-details="openErrorDetails"
|
||||
@@ -39,7 +42,7 @@
|
||||
<!-- Row: Concurrency + Throughput -->
|
||||
<div v-if="opsEnabled && !(loading && !hasLoadedOnce)" class="grid grid-cols-1 gap-6 lg:grid-cols-3">
|
||||
<div class="lg:col-span-1 min-h-[360px]">
|
||||
<OpsConcurrencyCard :platform-filter="platform" :group-id-filter="groupId" />
|
||||
<OpsConcurrencyCard :platform-filter="platform" :group-id-filter="groupId" :refresh-token="dashboardRefreshToken" />
|
||||
</div>
|
||||
<div class="lg:col-span-2 min-h-[360px]">
|
||||
<OpsThroughputTrendChart
|
||||
@@ -148,8 +151,8 @@ const { t } = useI18n()
|
||||
|
||||
const opsEnabled = computed(() => adminSettingsStore.opsMonitoringEnabled)
|
||||
|
||||
type TimeRange = '5m' | '30m' | '1h' | '6h' | '24h'
|
||||
const allowedTimeRanges = new Set<TimeRange>(['5m', '30m', '1h', '6h', '24h'])
|
||||
type TimeRange = '5m' | '30m' | '1h' | '6h' | '24h' | 'custom'
|
||||
const allowedTimeRanges = new Set<TimeRange>(['5m', '30m', '1h', '6h', '24h', 'custom'])
|
||||
|
||||
type QueryMode = 'auto' | 'raw' | 'preagg'
|
||||
const allowedQueryModes = new Set<QueryMode>(['auto', 'raw', 'preagg'])
|
||||
@@ -163,6 +166,8 @@ const timeRange = ref<TimeRange>('1h')
|
||||
const platform = ref<string>('')
|
||||
const groupId = ref<number | null>(null)
|
||||
const queryMode = ref<QueryMode>('auto')
|
||||
const customStartTime = ref<string | null>(null)
|
||||
const customEndTime = ref<string | null>(null)
|
||||
|
||||
const QUERY_KEYS = {
|
||||
timeRange: 'tr',
|
||||
@@ -347,23 +352,24 @@ const autoRefreshEnabled = ref(false)
|
||||
const autoRefreshIntervalMs = ref(30000) // default 30 seconds
|
||||
const autoRefreshCountdown = ref(0)
|
||||
|
||||
// Auto refresh timer
|
||||
const { pause: pauseAutoRefresh, resume: resumeAutoRefresh } = useIntervalFn(
|
||||
() => {
|
||||
if (autoRefreshEnabled.value && opsEnabled.value && !loading.value) {
|
||||
fetchData()
|
||||
}
|
||||
},
|
||||
autoRefreshIntervalMs,
|
||||
{ immediate: false }
|
||||
)
|
||||
// Used to trigger child component refreshes in a single shared cadence.
|
||||
const dashboardRefreshToken = ref(0)
|
||||
|
||||
// Countdown timer (updates every second)
|
||||
// Countdown timer (drives auto refresh; updates every second)
|
||||
const { pause: pauseCountdown, resume: resumeCountdown } = useIntervalFn(
|
||||
() => {
|
||||
if (autoRefreshEnabled.value && autoRefreshCountdown.value > 0) {
|
||||
autoRefreshCountdown.value--
|
||||
if (!autoRefreshEnabled.value) return
|
||||
if (!opsEnabled.value) return
|
||||
if (loading.value) return
|
||||
|
||||
if (autoRefreshCountdown.value <= 0) {
|
||||
// Fetch immediately when the countdown reaches 0.
|
||||
// fetchData() will reset the countdown to the full interval.
|
||||
fetchData()
|
||||
return
|
||||
}
|
||||
|
||||
autoRefreshCountdown.value -= 1
|
||||
},
|
||||
1000,
|
||||
{ immediate: false }
|
||||
@@ -420,6 +426,11 @@ function onTimeRangeChange(v: string | number | boolean | null) {
|
||||
timeRange.value = v as TimeRange
|
||||
}
|
||||
|
||||
function onCustomTimeRangeChange(startTime: string, endTime: string) {
|
||||
customStartTime.value = startTime
|
||||
customEndTime.value = endTime
|
||||
}
|
||||
|
||||
function onSettingsSaved() {
|
||||
loadThresholds()
|
||||
fetchData()
|
||||
@@ -458,18 +469,32 @@ function openError(id: number) {
|
||||
showErrorModal.value = true
|
||||
}
|
||||
|
||||
function buildApiParams() {
|
||||
const params: any = {
|
||||
platform: platform.value || undefined,
|
||||
group_id: groupId.value ?? undefined,
|
||||
mode: queryMode.value
|
||||
}
|
||||
|
||||
if (timeRange.value === 'custom') {
|
||||
if (customStartTime.value && customEndTime.value) {
|
||||
params.start_time = customStartTime.value
|
||||
params.end_time = customEndTime.value
|
||||
} else {
|
||||
// Safety fallback: avoid sending time_range=custom (backend may not support it)
|
||||
params.time_range = '1h'
|
||||
}
|
||||
} else {
|
||||
params.time_range = timeRange.value
|
||||
}
|
||||
|
||||
return params
|
||||
}
|
||||
|
||||
async function refreshOverviewWithCancel(fetchSeq: number, signal: AbortSignal) {
|
||||
if (!opsEnabled.value) return
|
||||
try {
|
||||
const data = await opsAPI.getDashboardOverview(
|
||||
{
|
||||
time_range: timeRange.value,
|
||||
platform: platform.value || undefined,
|
||||
group_id: groupId.value ?? undefined,
|
||||
mode: queryMode.value
|
||||
},
|
||||
{ signal }
|
||||
)
|
||||
const data = await opsAPI.getDashboardOverview(buildApiParams(), { signal })
|
||||
if (fetchSeq !== dashboardFetchSeq) return
|
||||
overview.value = data
|
||||
} catch (err: any) {
|
||||
@@ -483,15 +508,7 @@ async function refreshThroughputTrendWithCancel(fetchSeq: number, signal: AbortS
|
||||
if (!opsEnabled.value) return
|
||||
loadingTrend.value = true
|
||||
try {
|
||||
const data = await opsAPI.getThroughputTrend(
|
||||
{
|
||||
time_range: timeRange.value,
|
||||
platform: platform.value || undefined,
|
||||
group_id: groupId.value ?? undefined,
|
||||
mode: queryMode.value
|
||||
},
|
||||
{ signal }
|
||||
)
|
||||
const data = await opsAPI.getThroughputTrend(buildApiParams(), { signal })
|
||||
if (fetchSeq !== dashboardFetchSeq) return
|
||||
throughputTrend.value = data
|
||||
} catch (err: any) {
|
||||
@@ -509,15 +526,7 @@ async function refreshLatencyHistogramWithCancel(fetchSeq: number, signal: Abort
|
||||
if (!opsEnabled.value) return
|
||||
loadingLatency.value = true
|
||||
try {
|
||||
const data = await opsAPI.getLatencyHistogram(
|
||||
{
|
||||
time_range: timeRange.value,
|
||||
platform: platform.value || undefined,
|
||||
group_id: groupId.value ?? undefined,
|
||||
mode: queryMode.value
|
||||
},
|
||||
{ signal }
|
||||
)
|
||||
const data = await opsAPI.getLatencyHistogram(buildApiParams(), { signal })
|
||||
if (fetchSeq !== dashboardFetchSeq) return
|
||||
latencyHistogram.value = data
|
||||
} catch (err: any) {
|
||||
@@ -535,15 +544,7 @@ async function refreshErrorTrendWithCancel(fetchSeq: number, signal: AbortSignal
|
||||
if (!opsEnabled.value) return
|
||||
loadingErrorTrend.value = true
|
||||
try {
|
||||
const data = await opsAPI.getErrorTrend(
|
||||
{
|
||||
time_range: timeRange.value,
|
||||
platform: platform.value || undefined,
|
||||
group_id: groupId.value ?? undefined,
|
||||
mode: queryMode.value
|
||||
},
|
||||
{ signal }
|
||||
)
|
||||
const data = await opsAPI.getErrorTrend(buildApiParams(), { signal })
|
||||
if (fetchSeq !== dashboardFetchSeq) return
|
||||
errorTrend.value = data
|
||||
} catch (err: any) {
|
||||
@@ -561,15 +562,7 @@ async function refreshErrorDistributionWithCancel(fetchSeq: number, signal: Abor
|
||||
if (!opsEnabled.value) return
|
||||
loadingErrorDistribution.value = true
|
||||
try {
|
||||
const data = await opsAPI.getErrorDistribution(
|
||||
{
|
||||
time_range: timeRange.value,
|
||||
platform: platform.value || undefined,
|
||||
group_id: groupId.value ?? undefined,
|
||||
mode: queryMode.value
|
||||
},
|
||||
{ signal }
|
||||
)
|
||||
const data = await opsAPI.getErrorDistribution(buildApiParams(), { signal })
|
||||
if (fetchSeq !== dashboardFetchSeq) return
|
||||
errorDistribution.value = data
|
||||
} catch (err: any) {
|
||||
@@ -612,7 +605,12 @@ async function fetchData() {
|
||||
refreshErrorDistributionWithCancel(fetchSeq, dashboardFetchController.signal)
|
||||
])
|
||||
if (fetchSeq !== dashboardFetchSeq) return
|
||||
|
||||
lastUpdated.value = new Date()
|
||||
|
||||
// Trigger child component refreshes using the same cadence as the header.
|
||||
dashboardRefreshToken.value += 1
|
||||
|
||||
// Reset auto refresh countdown after successful fetch
|
||||
if (autoRefreshEnabled.value) {
|
||||
autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
|
||||
@@ -686,15 +684,14 @@ onMounted(async () => {
|
||||
|
||||
// Start auto refresh if enabled
|
||||
if (autoRefreshEnabled.value) {
|
||||
resumeAutoRefresh()
|
||||
resumeCountdown()
|
||||
}
|
||||
})
|
||||
|
||||
async function loadThresholds() {
|
||||
try {
|
||||
const settings = await opsAPI.getAlertRuntimeSettings()
|
||||
metricThresholds.value = settings.thresholds || null
|
||||
const thresholds = await opsAPI.getMetricThresholds()
|
||||
metricThresholds.value = thresholds || null
|
||||
} catch (err) {
|
||||
console.warn('[OpsDashboard] Failed to load thresholds', err)
|
||||
metricThresholds.value = null
|
||||
@@ -704,7 +701,6 @@ async function loadThresholds() {
|
||||
onUnmounted(() => {
|
||||
window.removeEventListener('keydown', handleKeydown)
|
||||
abortDashboardFetch()
|
||||
pauseAutoRefresh()
|
||||
pauseCountdown()
|
||||
})
|
||||
|
||||
@@ -712,10 +708,8 @@ onUnmounted(() => {
|
||||
watch(autoRefreshEnabled, (enabled) => {
|
||||
if (enabled) {
|
||||
autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
|
||||
resumeAutoRefresh()
|
||||
resumeCountdown()
|
||||
} else {
|
||||
pauseAutoRefresh()
|
||||
pauseCountdown()
|
||||
autoRefreshCountdown.value = 0
|
||||
}
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
<script setup lang="ts">
|
||||
import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
|
||||
import { computed, ref, watch } from 'vue'
|
||||
import { useI18n } from 'vue-i18n'
|
||||
import { useIntervalFn } from '@vueuse/core'
|
||||
import { opsAPI, type OpsAccountAvailabilityStatsResponse, type OpsConcurrencyStatsResponse } from '@/api/admin/ops'
|
||||
|
||||
interface Props {
|
||||
platformFilter?: string
|
||||
groupIdFilter?: number | null
|
||||
refreshToken: number
|
||||
}
|
||||
|
||||
const props = withDefaults(defineProps<Props>(), {
|
||||
@@ -233,15 +233,13 @@ async function loadData() {
|
||||
}
|
||||
}
|
||||
|
||||
// 定期刷新(5秒)
|
||||
const { pause: pauseRefresh, resume: resumeRefresh } = useIntervalFn(
|
||||
// 刷新节奏由父组件统一控制(OpsDashboard Header 的刷新状态/倒计时)
|
||||
watch(
|
||||
() => props.refreshToken,
|
||||
() => {
|
||||
if (realtimeEnabled.value) {
|
||||
loadData()
|
||||
}
|
||||
},
|
||||
5000,
|
||||
{ immediate: false }
|
||||
if (!realtimeEnabled.value) return
|
||||
loadData()
|
||||
}
|
||||
)
|
||||
|
||||
function getLoadBarClass(loadPct: number): string {
|
||||
@@ -271,23 +269,15 @@ function formatDuration(seconds: number): string {
|
||||
return `${hours}h`
|
||||
}
|
||||
|
||||
onMounted(() => {
|
||||
loadData()
|
||||
resumeRefresh()
|
||||
})
|
||||
|
||||
onUnmounted(() => {
|
||||
pauseRefresh()
|
||||
})
|
||||
|
||||
watch(realtimeEnabled, async (enabled) => {
|
||||
if (!enabled) {
|
||||
pauseRefresh()
|
||||
} else {
|
||||
resumeRefresh()
|
||||
await loadData()
|
||||
}
|
||||
})
|
||||
watch(
|
||||
() => realtimeEnabled.value,
|
||||
async (enabled) => {
|
||||
if (enabled) {
|
||||
await loadData()
|
||||
}
|
||||
},
|
||||
{ immediate: true }
|
||||
)
|
||||
</script>
|
||||
|
||||
<template>
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
<script setup lang="ts">
|
||||
import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
|
||||
import { useIntervalFn } from '@vueuse/core'
|
||||
import { computed, onMounted, ref, watch } from 'vue'
|
||||
import { useI18n } from 'vue-i18n'
|
||||
import Select from '@/components/common/Select.vue'
|
||||
import HelpTooltip from '@/components/common/HelpTooltip.vue'
|
||||
@@ -26,6 +25,8 @@ interface Props {
|
||||
autoRefreshEnabled?: boolean
|
||||
autoRefreshCountdown?: number
|
||||
fullscreen?: boolean
|
||||
customStartTime?: string | null
|
||||
customEndTime?: string | null
|
||||
}
|
||||
|
||||
interface Emits {
|
||||
@@ -33,6 +34,7 @@ interface Emits {
|
||||
(e: 'update:group', value: number | null): void
|
||||
(e: 'update:timeRange', value: string): void
|
||||
(e: 'update:queryMode', value: string): void
|
||||
(e: 'update:customTimeRange', startTime: string, endTime: string): void
|
||||
(e: 'refresh'): void
|
||||
(e: 'openRequestDetails', preset?: OpsRequestDetailsPreset): void
|
||||
(e: 'openErrorDetails', kind: 'request' | 'upstream'): void
|
||||
@@ -85,6 +87,23 @@ watch(
|
||||
|
||||
// --- Filters ---
|
||||
|
||||
const showCustomTimeRangeDialog = ref(false)
|
||||
const customStartTimeInput = ref('')
|
||||
const customEndTimeInput = ref('')
|
||||
|
||||
function formatCustomTimeRangeLabel(startTime: string, endTime: string): string {
|
||||
const start = new Date(startTime)
|
||||
const end = new Date(endTime)
|
||||
const formatDate = (d: Date) => {
|
||||
const month = String(d.getMonth() + 1).padStart(2, '0')
|
||||
const day = String(d.getDate()).padStart(2, '0')
|
||||
const hour = String(d.getHours()).padStart(2, '0')
|
||||
const minute = String(d.getMinutes()).padStart(2, '0')
|
||||
return `${month}-${day} ${hour}:${minute}`
|
||||
}
|
||||
return `${formatDate(start)} ~ ${formatDate(end)}`
|
||||
}
|
||||
|
||||
const groups = ref<Array<{ id: number; name: string; platform: string }>>([])
|
||||
|
||||
const platformOptions = computed(() => [
|
||||
@@ -100,7 +119,13 @@ const timeRangeOptions = computed(() => [
|
||||
{ value: '30m', label: t('admin.ops.timeRange.30m') },
|
||||
{ value: '1h', label: t('admin.ops.timeRange.1h') },
|
||||
{ value: '6h', label: t('admin.ops.timeRange.6h') },
|
||||
{ value: '24h', label: t('admin.ops.timeRange.24h') }
|
||||
{ value: '24h', label: t('admin.ops.timeRange.24h') },
|
||||
{
|
||||
value: 'custom',
|
||||
label: props.timeRange === 'custom' && props.customStartTime && props.customEndTime
|
||||
? `${t('admin.ops.timeRange.custom')} (${formatCustomTimeRangeLabel(props.customStartTime, props.customEndTime)})`
|
||||
: t('admin.ops.timeRange.custom')
|
||||
}
|
||||
])
|
||||
|
||||
const queryModeOptions = computed(() => [
|
||||
@@ -149,7 +174,34 @@ function handleGroupChange(val: string | number | boolean | null) {
|
||||
}
|
||||
|
||||
function handleTimeRangeChange(val: string | number | boolean | null) {
|
||||
emit('update:timeRange', String(val || '1h'))
|
||||
const newValue = String(val || '1h')
|
||||
if (newValue === 'custom') {
|
||||
// 初始化为最近1小时
|
||||
const now = new Date()
|
||||
const oneHourAgo = new Date(now.getTime() - 60 * 60 * 1000)
|
||||
customStartTimeInput.value = oneHourAgo.toISOString().slice(0, 16)
|
||||
customEndTimeInput.value = now.toISOString().slice(0, 16)
|
||||
showCustomTimeRangeDialog.value = true
|
||||
} else {
|
||||
emit('update:timeRange', newValue)
|
||||
}
|
||||
}
|
||||
|
||||
function handleCustomTimeRangeConfirm() {
|
||||
if (!customStartTimeInput.value || !customEndTimeInput.value) return
|
||||
const startTime = new Date(customStartTimeInput.value).toISOString()
|
||||
const endTime = new Date(customEndTimeInput.value).toISOString()
|
||||
// Emit custom time range first so the parent can build correct API params
|
||||
// when it reacts to timeRange switching to "custom".
|
||||
emit('update:customTimeRange', startTime, endTime)
|
||||
emit('update:timeRange', 'custom')
|
||||
showCustomTimeRangeDialog.value = false
|
||||
}
|
||||
|
||||
function handleCustomTimeRangeCancel() {
|
||||
showCustomTimeRangeDialog.value = false
|
||||
// 如果当前不是 custom,不需要做任何事
|
||||
// 如果当前是 custom,保持不变
|
||||
}
|
||||
|
||||
function handleQueryModeChange(val: string | number | boolean | null) {
|
||||
@@ -164,47 +216,60 @@ function openErrorDetails(kind: 'request' | 'upstream') {
|
||||
emit('openErrorDetails', kind)
|
||||
}
|
||||
|
||||
const updatedAtLabel = computed(() => {
|
||||
if (!props.lastUpdated) return t('common.unknown')
|
||||
return props.lastUpdated.toLocaleTimeString()
|
||||
})
|
||||
|
||||
// --- Color coding for TTFT ---
|
||||
function getTTFTColor(ms: number | null | undefined): string {
|
||||
if (ms == null) return 'text-gray-900 dark:text-white'
|
||||
if (ms < 500) return 'text-green-600 dark:text-green-400'
|
||||
if (ms < 1000) return 'text-yellow-600 dark:text-yellow-400'
|
||||
if (ms < 2000) return 'text-orange-600 dark:text-orange-400'
|
||||
return 'text-red-600 dark:text-red-400'
|
||||
}
|
||||
|
||||
// --- Threshold checking helpers ---
|
||||
function isSLABelowThreshold(slaPercent: number | null): boolean {
|
||||
if (slaPercent == null) return false
|
||||
type ThresholdLevel = 'normal' | 'warning' | 'critical'
|
||||
|
||||
function getSLAThresholdLevel(slaPercent: number | null): ThresholdLevel {
|
||||
if (slaPercent == null) return 'normal'
|
||||
const threshold = props.thresholds?.sla_percent_min
|
||||
if (threshold == null) return false
|
||||
return slaPercent < threshold
|
||||
if (threshold == null) return 'normal'
|
||||
|
||||
// SLA is "higher is better":
|
||||
// - below threshold => critical
|
||||
// - within +0.1% buffer => warning
|
||||
const warningBuffer = 0.1
|
||||
|
||||
if (slaPercent < threshold) return 'critical'
|
||||
if (slaPercent < threshold + warningBuffer) return 'warning'
|
||||
return 'normal'
|
||||
}
|
||||
|
||||
function isTTFTAboveThreshold(ttftP99Ms: number | null): boolean {
|
||||
if (ttftP99Ms == null) return false
|
||||
function getTTFTThresholdLevel(ttftMs: number | null): ThresholdLevel {
|
||||
if (ttftMs == null) return 'normal'
|
||||
const threshold = props.thresholds?.ttft_p99_ms_max
|
||||
if (threshold == null) return false
|
||||
return ttftP99Ms > threshold
|
||||
if (threshold == null) return 'normal'
|
||||
if (ttftMs >= threshold) return 'critical'
|
||||
if (ttftMs >= threshold * 0.8) return 'warning'
|
||||
return 'normal'
|
||||
}
|
||||
|
||||
function isRequestErrorRateAboveThreshold(errorRatePercent: number | null): boolean {
|
||||
if (errorRatePercent == null) return false
|
||||
function getRequestErrorRateThresholdLevel(errorRatePercent: number | null): ThresholdLevel {
|
||||
if (errorRatePercent == null) return 'normal'
|
||||
const threshold = props.thresholds?.request_error_rate_percent_max
|
||||
if (threshold == null) return false
|
||||
return errorRatePercent > threshold
|
||||
if (threshold == null) return 'normal'
|
||||
if (errorRatePercent >= threshold) return 'critical'
|
||||
if (errorRatePercent >= threshold * 0.8) return 'warning'
|
||||
return 'normal'
|
||||
}
|
||||
|
||||
function isUpstreamErrorRateAboveThreshold(upstreamErrorRatePercent: number | null): boolean {
|
||||
if (upstreamErrorRatePercent == null) return false
|
||||
function getUpstreamErrorRateThresholdLevel(upstreamErrorRatePercent: number | null): ThresholdLevel {
|
||||
if (upstreamErrorRatePercent == null) return 'normal'
|
||||
const threshold = props.thresholds?.upstream_error_rate_percent_max
|
||||
if (threshold == null) return false
|
||||
return upstreamErrorRatePercent > threshold
|
||||
if (threshold == null) return 'normal'
|
||||
if (upstreamErrorRatePercent >= threshold) return 'critical'
|
||||
if (upstreamErrorRatePercent >= threshold * 0.8) return 'warning'
|
||||
return 'normal'
|
||||
}
|
||||
|
||||
function getThresholdColorClass(level: ThresholdLevel): string {
|
||||
switch (level) {
|
||||
case 'critical':
|
||||
return 'text-red-600 dark:text-red-400'
|
||||
case 'warning':
|
||||
return 'text-yellow-600 dark:text-yellow-400'
|
||||
default:
|
||||
return 'text-green-600 dark:text-green-400'
|
||||
}
|
||||
}
|
||||
|
||||
// --- Realtime / Overview labels ---
|
||||
@@ -257,31 +322,33 @@ watch(
|
||||
{ immediate: true }
|
||||
)
|
||||
|
||||
const { pause: pauseRealtimeTrafficRefresh, resume: resumeRealtimeTrafficRefresh } = useIntervalFn(
|
||||
() => {
|
||||
loadRealtimeTrafficSummary()
|
||||
},
|
||||
5000,
|
||||
{ immediate: false }
|
||||
)
|
||||
|
||||
watch(
|
||||
() => adminSettingsStore.opsRealtimeMonitoringEnabled,
|
||||
(enabled) => {
|
||||
if (enabled) {
|
||||
resumeRealtimeTrafficRefresh()
|
||||
} else {
|
||||
pauseRealtimeTrafficRefresh()
|
||||
if (!enabled) {
|
||||
// Keep UI stable when realtime monitoring is turned off.
|
||||
realtimeTrafficSummary.value = makeZeroRealtimeTrafficSummary()
|
||||
} else {
|
||||
loadRealtimeTrafficSummary()
|
||||
}
|
||||
},
|
||||
{ immediate: true }
|
||||
)
|
||||
|
||||
onUnmounted(() => {
|
||||
pauseRealtimeTrafficRefresh()
|
||||
})
|
||||
// Realtime traffic refresh follows the parent (OpsDashboard) refresh cadence.
|
||||
watch(
|
||||
() => [props.autoRefreshEnabled, props.autoRefreshCountdown, props.loading] as const,
|
||||
([enabled, countdown, loading]) => {
|
||||
if (!enabled) return
|
||||
if (loading) return
|
||||
// Treat countdown reset (or reaching 0) as a refresh boundary.
|
||||
if (countdown === 0) {
|
||||
loadRealtimeTrafficSummary()
|
||||
}
|
||||
}
|
||||
)
|
||||
|
||||
// no-op: parent controls refresh cadence
|
||||
|
||||
const displayRealTimeQps = computed(() => {
|
||||
const v = realtimeTrafficSummary.value?.qps?.current
|
||||
@@ -817,25 +884,11 @@ function handleToolbarRefresh() {
|
||||
</span>
|
||||
|
||||
<span>·</span>
|
||||
<span>{{ t('common.refresh') }}: {{ updatedAtLabel }}</span>
|
||||
<span>{{ t('common.refresh') }}: {{ props.lastUpdated ? props.lastUpdated.toLocaleString('zh-CN', { year: 'numeric', month: '2-digit', day: '2-digit', hour: '2-digit', minute: '2-digit', second: '2-digit' }).replace(/\//g, '-') : t('common.unknown') }}</span>
|
||||
|
||||
<template v-if="props.autoRefreshEnabled && props.autoRefreshCountdown !== undefined">
|
||||
<span>·</span>
|
||||
<span class="flex items-center gap-1">
|
||||
<svg class="h-3 w-3 animate-spin text-blue-500" fill="none" viewBox="0 0 24 24">
|
||||
<circle class="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" stroke-width="4"></circle>
|
||||
<path class="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
|
||||
</svg>
|
||||
<span>{{ t('admin.ops.settings.autoRefreshCountdown', { seconds: props.autoRefreshCountdown }) }}</span>
|
||||
</span>
|
||||
</template>
|
||||
|
||||
<template v-if="systemMetrics">
|
||||
<span>·</span>
|
||||
<span>
|
||||
{{ t('admin.ops.collectedAt') }} {{ formatTimeShort(systemMetrics.created_at) }}
|
||||
({{ t('admin.ops.window') }} {{ systemMetrics.window_minutes }}m)
|
||||
</span>
|
||||
<span>剩余 {{ props.autoRefreshCountdown }}s</span>
|
||||
</template>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1197,7 +1250,7 @@ function handleToolbarRefresh() {
|
||||
<div class="flex items-center gap-2">
|
||||
<span class="text-[10px] font-bold uppercase text-gray-400">{{ t('admin.ops.sla') }}</span>
|
||||
<HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.sla')" />
|
||||
<span class="h-1.5 w-1.5 rounded-full" :class="isSLABelowThreshold(slaPercent) ? 'bg-red-500' : (slaPercent ?? 0) >= 99.5 ? 'bg-green-500' : 'bg-yellow-500'"></span>
|
||||
<span class="h-1.5 w-1.5 rounded-full" :class="getSLAThresholdLevel(slaPercent) === 'critical' ? 'bg-red-500' : getSLAThresholdLevel(slaPercent) === 'warning' ? 'bg-yellow-500' : 'bg-green-500'"></span>
|
||||
</div>
|
||||
<button
|
||||
v-if="!props.fullscreen"
|
||||
@@ -1208,11 +1261,11 @@ function handleToolbarRefresh() {
|
||||
{{ t('admin.ops.requestDetails.details') }}
|
||||
</button>
|
||||
</div>
|
||||
<div class="mt-2 text-3xl font-black" :class="isSLABelowThreshold(slaPercent) ? 'text-red-600 dark:text-red-400' : 'text-gray-900 dark:text-white'">
|
||||
<div class="mt-2 text-3xl font-black" :class="getThresholdColorClass(getSLAThresholdLevel(slaPercent))">
|
||||
{{ slaPercent == null ? '-' : `${slaPercent.toFixed(3)}%` }}
|
||||
</div>
|
||||
<div class="mt-3 h-2 w-full overflow-hidden rounded-full bg-gray-200 dark:bg-dark-700">
|
||||
<div class="h-full transition-all" :class="isSLABelowThreshold(slaPercent) ? 'bg-red-500' : 'bg-green-500'" :style="{ width: `${Math.max((slaPercent ?? 0) - 90, 0) * 10}%` }"></div>
|
||||
<div class="h-full transition-all" :class="getSLAThresholdLevel(slaPercent) === 'critical' ? 'bg-red-500' : getSLAThresholdLevel(slaPercent) === 'warning' ? 'bg-yellow-500' : 'bg-green-500'" :style="{ width: `${Math.max((slaPercent ?? 0) - 90, 0) * 10}%` }"></div>
|
||||
</div>
|
||||
<div class="mt-3 text-xs">
|
||||
<div class="flex justify-between">
|
||||
@@ -1233,7 +1286,7 @@ function handleToolbarRefresh() {
|
||||
v-if="!props.fullscreen"
|
||||
class="text-[10px] font-bold text-blue-500 hover:underline"
|
||||
type="button"
|
||||
@click="openDetails({ title: t('admin.ops.latencyDuration'), sort: 'duration_desc', min_duration_ms: Math.max(Number(durationP99Ms ?? 0), 0) })"
|
||||
@click="openDetails({ title: t('admin.ops.latencyDuration'), sort: 'duration_desc' })"
|
||||
>
|
||||
{{ t('admin.ops.requestDetails.details') }}
|
||||
</button>
|
||||
@@ -1244,28 +1297,28 @@ function handleToolbarRefresh() {
|
||||
</div>
|
||||
<span class="text-xs font-bold text-gray-400">ms (P99)</span>
|
||||
</div>
|
||||
<div class="mt-3 flex flex-wrap gap-x-3 gap-y-1 text-xs">
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">{{ t('admin.ops.p95') }}</span>
|
||||
<div class="mt-3 grid grid-cols-1 gap-x-3 gap-y-1 text-xs 2xl:grid-cols-2">
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">P95:</span>
|
||||
<span class="font-bold text-gray-900 dark:text-white">{{ durationP95Ms ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
</div>
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">{{ t('admin.ops.p90') }}</span>
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">P90:</span>
|
||||
<span class="font-bold text-gray-900 dark:text-white">{{ durationP90Ms ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
</div>
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">{{ t('admin.ops.p50') }}</span>
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">P50:</span>
|
||||
<span class="font-bold text-gray-900 dark:text-white">{{ durationP50Ms ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
</div>
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">Avg:</span>
|
||||
<span class="font-bold text-gray-900 dark:text-white">{{ durationAvgMs ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
</div>
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">Max:</span>
|
||||
<span class="font-bold text-gray-900 dark:text-white">{{ durationMaxMs ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
@@ -1290,35 +1343,35 @@ function handleToolbarRefresh() {
|
||||
</button>
|
||||
</div>
|
||||
<div class="mt-2 flex items-baseline gap-2">
|
||||
<div class="text-3xl font-black" :class="isTTFTAboveThreshold(ttftP99Ms) ? 'text-red-600 dark:text-red-400' : getTTFTColor(ttftP99Ms)">
|
||||
<div class="text-3xl font-black" :class="getThresholdColorClass(getTTFTThresholdLevel(ttftP99Ms))">
|
||||
{{ ttftP99Ms ?? '-' }}
|
||||
</div>
|
||||
<span class="text-xs font-bold text-gray-400">ms (P99)</span>
|
||||
</div>
|
||||
<div class="mt-3 flex flex-wrap gap-x-3 gap-y-1 text-xs">
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">{{ t('admin.ops.p95') }}</span>
|
||||
<span class="font-bold" :class="getTTFTColor(ttftP95Ms)">{{ ttftP95Ms ?? '-' }}</span>
|
||||
<div class="mt-3 grid grid-cols-1 gap-x-3 gap-y-1 text-xs 2xl:grid-cols-2">
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">P95:</span>
|
||||
<span class="font-bold" :class="getThresholdColorClass(getTTFTThresholdLevel(ttftP95Ms))">{{ ttftP95Ms ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
</div>
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">{{ t('admin.ops.p90') }}</span>
|
||||
<span class="font-bold" :class="getTTFTColor(ttftP90Ms)">{{ ttftP90Ms ?? '-' }}</span>
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">P90:</span>
|
||||
<span class="font-bold" :class="getThresholdColorClass(getTTFTThresholdLevel(ttftP90Ms))">{{ ttftP90Ms ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
</div>
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">{{ t('admin.ops.p50') }}</span>
|
||||
<span class="font-bold" :class="getTTFTColor(ttftP50Ms)">{{ ttftP50Ms ?? '-' }}</span>
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">P50:</span>
|
||||
<span class="font-bold" :class="getThresholdColorClass(getTTFTThresholdLevel(ttftP50Ms))">{{ ttftP50Ms ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
</div>
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">Avg:</span>
|
||||
<span class="font-bold" :class="getTTFTColor(ttftAvgMs)">{{ ttftAvgMs ?? '-' }}</span>
|
||||
<span class="font-bold" :class="getThresholdColorClass(getTTFTThresholdLevel(ttftAvgMs))">{{ ttftAvgMs ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
</div>
|
||||
<div class="flex min-w-[60px] items-baseline gap-1 whitespace-nowrap">
|
||||
<div class="flex items-baseline gap-1 whitespace-nowrap">
|
||||
<span class="text-gray-500">Max:</span>
|
||||
<span class="font-bold" :class="getTTFTColor(ttftMaxMs)">{{ ttftMaxMs ?? '-' }}</span>
|
||||
<span class="font-bold" :class="getThresholdColorClass(getTTFTThresholdLevel(ttftMaxMs))">{{ ttftMaxMs ?? '-' }}</span>
|
||||
<span class="text-gray-400">ms</span>
|
||||
</div>
|
||||
</div>
|
||||
@@ -1335,7 +1388,7 @@ function handleToolbarRefresh() {
|
||||
{{ t('admin.ops.requestDetails.details') }}
|
||||
</button>
|
||||
</div>
|
||||
<div class="mt-2 text-3xl font-black" :class="isRequestErrorRateAboveThreshold(errorRatePercent) ? 'text-red-600 dark:text-red-400' : (errorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
|
||||
<div class="mt-2 text-3xl font-black" :class="getThresholdColorClass(getRequestErrorRateThresholdLevel(errorRatePercent))">
|
||||
{{ errorRatePercent == null ? '-' : `${errorRatePercent.toFixed(2)}%` }}
|
||||
</div>
|
||||
<div class="mt-3 space-y-1 text-xs">
|
||||
@@ -1361,7 +1414,7 @@ function handleToolbarRefresh() {
|
||||
{{ t('admin.ops.requestDetails.details') }}
|
||||
</button>
|
||||
</div>
|
||||
<div class="mt-2 text-3xl font-black" :class="isUpstreamErrorRateAboveThreshold(upstreamErrorRatePercent) ? 'text-red-600 dark:text-red-400' : (upstreamErrorRatePercent ?? 0) > 5 ? 'text-red-500' : 'text-gray-900 dark:text-white'">
|
||||
<div class="mt-2 text-3xl font-black" :class="getThresholdColorClass(getUpstreamErrorRateThresholdLevel(upstreamErrorRatePercent))">
|
||||
{{ upstreamErrorRatePercent == null ? '-' : `${upstreamErrorRatePercent.toFixed(2)}%` }}
|
||||
</div>
|
||||
<div class="mt-3 space-y-1 text-xs">
|
||||
@@ -1398,7 +1451,7 @@ function handleToolbarRefresh() {
|
||||
<!-- MEM -->
|
||||
<div class="rounded-xl bg-gray-50 p-3 dark:bg-dark-900">
|
||||
<div class="flex items-center gap-1">
|
||||
<div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">{{ t('admin.ops.mem') }}</div>
|
||||
<div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">{{ t('admin.ops.memory') }}</div>
|
||||
<HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.memory')" />
|
||||
</div>
|
||||
<div class="mt-1 text-lg font-black" :class="memPercentClass">
|
||||
@@ -1501,7 +1554,10 @@ function handleToolbarRefresh() {
|
||||
>
|
||||
<div class="flex items-center justify-between gap-3">
|
||||
<div class="truncate text-sm font-semibold text-gray-900 dark:text-white">{{ hb.job_name }}</div>
|
||||
<div class="text-xs text-gray-500 dark:text-gray-400">{{ formatTimeShort(hb.updated_at) }}</div>
|
||||
<div class="flex items-center gap-3 text-xs text-gray-500 dark:text-gray-400">
|
||||
<span v-if="hb.last_duration_ms != null" class="font-mono">{{ hb.last_duration_ms }}ms</span>
|
||||
<span>{{ formatTimeShort(hb.updated_at) }}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="mt-2 grid grid-cols-1 gap-2 text-xs text-gray-600 dark:text-gray-300 sm:grid-cols-2">
|
||||
@@ -1511,6 +1567,9 @@ function handleToolbarRefresh() {
|
||||
<div>
|
||||
{{ t('admin.ops.lastError') }} <span class="font-mono">{{ formatTimeShort(hb.last_error_at) }}</span>
|
||||
</div>
|
||||
<div>
|
||||
{{ t('admin.ops.result') }} <span class="font-mono">{{ hb.last_result || '-' }}</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div
|
||||
@@ -1522,5 +1581,47 @@ function handleToolbarRefresh() {
|
||||
</div>
|
||||
</div>
|
||||
</BaseDialog>
|
||||
|
||||
<!-- Custom Time Range Dialog -->
|
||||
<BaseDialog :show="showCustomTimeRangeDialog" :title="t('admin.ops.timeRange.custom')" width="narrow" @close="handleCustomTimeRangeCancel">
|
||||
<div class="space-y-4">
|
||||
<div>
|
||||
<label class="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1">
|
||||
{{ t('admin.ops.customTimeRange.startTime') }}
|
||||
</label>
|
||||
<input
|
||||
v-model="customStartTimeInput"
|
||||
type="datetime-local"
|
||||
class="w-full rounded-lg border border-gray-300 bg-white px-3 py-2 text-sm text-gray-900 focus:border-blue-500 focus:outline-none focus:ring-1 focus:ring-blue-500 dark:border-dark-600 dark:bg-dark-800 dark:text-white"
|
||||
/>
|
||||
</div>
|
||||
<div>
|
||||
<label class="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1">
|
||||
{{ t('admin.ops.customTimeRange.endTime') }}
|
||||
</label>
|
||||
<input
|
||||
v-model="customEndTimeInput"
|
||||
type="datetime-local"
|
||||
class="w-full rounded-lg border border-gray-300 bg-white px-3 py-2 text-sm text-gray-900 focus:border-blue-500 focus:outline-none focus:ring-1 focus:ring-blue-500 dark:border-dark-600 dark:bg-dark-800 dark:text-white"
|
||||
/>
|
||||
</div>
|
||||
<div class="flex justify-end gap-3 pt-2">
|
||||
<button
|
||||
type="button"
|
||||
class="rounded-lg bg-gray-100 px-4 py-2 text-sm font-medium text-gray-700 hover:bg-gray-200 dark:bg-dark-700 dark:text-gray-300 dark:hover:bg-dark-600"
|
||||
@click="handleCustomTimeRangeCancel"
|
||||
>
|
||||
{{ t('common.cancel') }}
|
||||
</button>
|
||||
<button
|
||||
type="button"
|
||||
class="rounded-lg bg-blue-500 px-4 py-2 text-sm font-medium text-white hover:bg-blue-600"
|
||||
@click="handleCustomTimeRangeConfirm"
|
||||
>
|
||||
{{ t('common.confirm') }}
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
</BaseDialog>
|
||||
</div>
|
||||
</template>
|
||||
|
||||
Reference in New Issue
Block a user