refactor(ops): 优化任务心跳和组件刷新机制
后端改动: - 添加 ops_job_heartbeats.last_result 字段记录任务执行结果 - 优化告警评估器统计信息(规则数/事件数/邮件数) - 统一各定时任务的心跳记录格式 前端改动: - 重构 OpsConcurrencyCard 使用父组件统一控制刷新节奏 - 移除独立的 5 秒刷新定时器,改用 refreshToken 机制 - 修复 TypeScript 类型错误
This commit is contained in:
@@ -296,9 +296,10 @@ INSERT INTO ops_job_heartbeats (
|
|||||||
last_error_at,
|
last_error_at,
|
||||||
last_error,
|
last_error,
|
||||||
last_duration_ms,
|
last_duration_ms,
|
||||||
|
last_result,
|
||||||
updated_at
|
updated_at
|
||||||
) VALUES (
|
) VALUES (
|
||||||
$1,$2,$3,$4,$5,$6,NOW()
|
$1,$2,$3,$4,$5,$6,$7,NOW()
|
||||||
)
|
)
|
||||||
ON CONFLICT (job_name) DO UPDATE SET
|
ON CONFLICT (job_name) DO UPDATE SET
|
||||||
last_run_at = COALESCE(EXCLUDED.last_run_at, ops_job_heartbeats.last_run_at),
|
last_run_at = COALESCE(EXCLUDED.last_run_at, ops_job_heartbeats.last_run_at),
|
||||||
@@ -312,6 +313,10 @@ ON CONFLICT (job_name) DO UPDATE SET
|
|||||||
ELSE COALESCE(EXCLUDED.last_error, ops_job_heartbeats.last_error)
|
ELSE COALESCE(EXCLUDED.last_error, ops_job_heartbeats.last_error)
|
||||||
END,
|
END,
|
||||||
last_duration_ms = COALESCE(EXCLUDED.last_duration_ms, ops_job_heartbeats.last_duration_ms),
|
last_duration_ms = COALESCE(EXCLUDED.last_duration_ms, ops_job_heartbeats.last_duration_ms),
|
||||||
|
last_result = CASE
|
||||||
|
WHEN EXCLUDED.last_success_at IS NOT NULL THEN COALESCE(EXCLUDED.last_result, ops_job_heartbeats.last_result)
|
||||||
|
ELSE ops_job_heartbeats.last_result
|
||||||
|
END,
|
||||||
updated_at = NOW()`
|
updated_at = NOW()`
|
||||||
|
|
||||||
_, err := r.db.ExecContext(
|
_, err := r.db.ExecContext(
|
||||||
@@ -323,6 +328,7 @@ ON CONFLICT (job_name) DO UPDATE SET
|
|||||||
opsNullTime(input.LastErrorAt),
|
opsNullTime(input.LastErrorAt),
|
||||||
opsNullString(input.LastError),
|
opsNullString(input.LastError),
|
||||||
opsNullInt(input.LastDurationMs),
|
opsNullInt(input.LastDurationMs),
|
||||||
|
opsNullString(input.LastResult),
|
||||||
)
|
)
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
@@ -340,6 +346,7 @@ SELECT
|
|||||||
last_error_at,
|
last_error_at,
|
||||||
last_error,
|
last_error,
|
||||||
last_duration_ms,
|
last_duration_ms,
|
||||||
|
last_result,
|
||||||
updated_at
|
updated_at
|
||||||
FROM ops_job_heartbeats
|
FROM ops_job_heartbeats
|
||||||
ORDER BY job_name ASC`
|
ORDER BY job_name ASC`
|
||||||
@@ -359,6 +366,8 @@ ORDER BY job_name ASC`
|
|||||||
var lastError sql.NullString
|
var lastError sql.NullString
|
||||||
var lastDuration sql.NullInt64
|
var lastDuration sql.NullInt64
|
||||||
|
|
||||||
|
var lastResult sql.NullString
|
||||||
|
|
||||||
if err := rows.Scan(
|
if err := rows.Scan(
|
||||||
&item.JobName,
|
&item.JobName,
|
||||||
&lastRun,
|
&lastRun,
|
||||||
@@ -366,6 +375,7 @@ ORDER BY job_name ASC`
|
|||||||
&lastErrorAt,
|
&lastErrorAt,
|
||||||
&lastError,
|
&lastError,
|
||||||
&lastDuration,
|
&lastDuration,
|
||||||
|
&lastResult,
|
||||||
&item.UpdatedAt,
|
&item.UpdatedAt,
|
||||||
); err != nil {
|
); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -391,6 +401,10 @@ ORDER BY job_name ASC`
|
|||||||
v := lastDuration.Int64
|
v := lastDuration.Int64
|
||||||
item.LastDurationMs = &v
|
item.LastDurationMs = &v
|
||||||
}
|
}
|
||||||
|
if lastResult.Valid {
|
||||||
|
v := lastResult.String
|
||||||
|
item.LastResult = &v
|
||||||
|
}
|
||||||
|
|
||||||
out = append(out, &item)
|
out = append(out, &item)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
@@ -235,11 +236,13 @@ func (s *OpsAggregationService) aggregateHourly() {
|
|||||||
successAt := finishedAt
|
successAt := finishedAt
|
||||||
hbCtx, hbCancel := context.WithTimeout(context.Background(), 2*time.Second)
|
hbCtx, hbCancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||||
defer hbCancel()
|
defer hbCancel()
|
||||||
|
result := truncateString(fmt.Sprintf("window=%s..%s", start.Format(time.RFC3339), end.Format(time.RFC3339)), 2048)
|
||||||
_ = s.opsRepo.UpsertJobHeartbeat(hbCtx, &OpsUpsertJobHeartbeatInput{
|
_ = s.opsRepo.UpsertJobHeartbeat(hbCtx, &OpsUpsertJobHeartbeatInput{
|
||||||
JobName: opsAggHourlyJobName,
|
JobName: opsAggHourlyJobName,
|
||||||
LastRunAt: &runAt,
|
LastRunAt: &runAt,
|
||||||
LastSuccessAt: &successAt,
|
LastSuccessAt: &successAt,
|
||||||
LastDurationMs: &dur,
|
LastDurationMs: &dur,
|
||||||
|
LastResult: &result,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -331,11 +334,13 @@ func (s *OpsAggregationService) aggregateDaily() {
|
|||||||
successAt := finishedAt
|
successAt := finishedAt
|
||||||
hbCtx, hbCancel := context.WithTimeout(context.Background(), 2*time.Second)
|
hbCtx, hbCancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||||
defer hbCancel()
|
defer hbCancel()
|
||||||
|
result := truncateString(fmt.Sprintf("window=%s..%s", start.Format(time.RFC3339), end.Format(time.RFC3339)), 2048)
|
||||||
_ = s.opsRepo.UpsertJobHeartbeat(hbCtx, &OpsUpsertJobHeartbeatInput{
|
_ = s.opsRepo.UpsertJobHeartbeat(hbCtx, &OpsUpsertJobHeartbeatInput{
|
||||||
JobName: opsAggDailyJobName,
|
JobName: opsAggDailyJobName,
|
||||||
LastRunAt: &runAt,
|
LastRunAt: &runAt,
|
||||||
LastSuccessAt: &successAt,
|
LastSuccessAt: &successAt,
|
||||||
LastDurationMs: &dur,
|
LastDurationMs: &dur,
|
||||||
|
LastResult: &result,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -190,6 +190,13 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rulesTotal := len(rules)
|
||||||
|
rulesEnabled := 0
|
||||||
|
rulesEvaluated := 0
|
||||||
|
eventsCreated := 0
|
||||||
|
eventsResolved := 0
|
||||||
|
emailsSent := 0
|
||||||
|
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
safeEnd := now.Truncate(time.Minute)
|
safeEnd := now.Truncate(time.Minute)
|
||||||
if safeEnd.IsZero() {
|
if safeEnd.IsZero() {
|
||||||
@@ -205,6 +212,7 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
|||||||
if rule == nil || !rule.Enabled || rule.ID <= 0 {
|
if rule == nil || !rule.Enabled || rule.ID <= 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
rulesEnabled++
|
||||||
|
|
||||||
scopePlatform, scopeGroupID, scopeRegion := parseOpsAlertRuleScope(rule.Filters)
|
scopePlatform, scopeGroupID, scopeRegion := parseOpsAlertRuleScope(rule.Filters)
|
||||||
|
|
||||||
@@ -220,6 +228,7 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
|||||||
s.resetRuleState(rule.ID, now)
|
s.resetRuleState(rule.ID, now)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
rulesEvaluated++
|
||||||
|
|
||||||
breachedNow := compareMetric(metricValue, rule.Operator, rule.Threshold)
|
breachedNow := compareMetric(metricValue, rule.Operator, rule.Threshold)
|
||||||
required := requiredSustainedBreaches(rule.SustainedMinutes, interval)
|
required := requiredSustainedBreaches(rule.SustainedMinutes, interval)
|
||||||
@@ -278,8 +287,11 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
eventsCreated++
|
||||||
if created != nil && created.ID > 0 {
|
if created != nil && created.ID > 0 {
|
||||||
s.maybeSendAlertEmail(ctx, runtimeCfg, rule, created)
|
if s.maybeSendAlertEmail(ctx, runtimeCfg, rule, created) {
|
||||||
|
emailsSent++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -289,11 +301,14 @@ func (s *OpsAlertEvaluatorService) evaluateOnce(interval time.Duration) {
|
|||||||
resolvedAt := now
|
resolvedAt := now
|
||||||
if err := s.opsRepo.UpdateAlertEventStatus(ctx, activeEvent.ID, OpsAlertStatusResolved, &resolvedAt); err != nil {
|
if err := s.opsRepo.UpdateAlertEventStatus(ctx, activeEvent.ID, OpsAlertStatusResolved, &resolvedAt); err != nil {
|
||||||
log.Printf("[OpsAlertEvaluator] resolve event failed (event=%d): %v", activeEvent.ID, err)
|
log.Printf("[OpsAlertEvaluator] resolve event failed (event=%d): %v", activeEvent.ID, err)
|
||||||
|
} else {
|
||||||
|
eventsResolved++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
s.recordHeartbeatSuccess(runAt, time.Since(startedAt))
|
result := truncateString(fmt.Sprintf("rules=%d enabled=%d evaluated=%d created=%d resolved=%d emails_sent=%d", rulesTotal, rulesEnabled, rulesEvaluated, eventsCreated, eventsResolved, emailsSent), 2048)
|
||||||
|
s.recordHeartbeatSuccess(runAt, time.Since(startedAt), result)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *OpsAlertEvaluatorService) pruneRuleStates(rules []*OpsAlertRule) {
|
func (s *OpsAlertEvaluatorService) pruneRuleStates(rules []*OpsAlertRule) {
|
||||||
@@ -585,32 +600,32 @@ func buildOpsAlertDescription(rule *OpsAlertRule, value float64, windowMinutes i
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *OpsAlertEvaluatorService) maybeSendAlertEmail(ctx context.Context, runtimeCfg *OpsAlertRuntimeSettings, rule *OpsAlertRule, event *OpsAlertEvent) {
|
func (s *OpsAlertEvaluatorService) maybeSendAlertEmail(ctx context.Context, runtimeCfg *OpsAlertRuntimeSettings, rule *OpsAlertRule, event *OpsAlertEvent) bool {
|
||||||
if s == nil || s.emailService == nil || s.opsService == nil || event == nil || rule == nil {
|
if s == nil || s.emailService == nil || s.opsService == nil || event == nil || rule == nil {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
if event.EmailSent {
|
if event.EmailSent {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
if !rule.NotifyEmail {
|
if !rule.NotifyEmail {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
emailCfg, err := s.opsService.GetEmailNotificationConfig(ctx)
|
emailCfg, err := s.opsService.GetEmailNotificationConfig(ctx)
|
||||||
if err != nil || emailCfg == nil || !emailCfg.Alert.Enabled {
|
if err != nil || emailCfg == nil || !emailCfg.Alert.Enabled {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(emailCfg.Alert.Recipients) == 0 {
|
if len(emailCfg.Alert.Recipients) == 0 {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
if !shouldSendOpsAlertEmailByMinSeverity(strings.TrimSpace(emailCfg.Alert.MinSeverity), strings.TrimSpace(rule.Severity)) {
|
if !shouldSendOpsAlertEmailByMinSeverity(strings.TrimSpace(emailCfg.Alert.MinSeverity), strings.TrimSpace(rule.Severity)) {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
if runtimeCfg != nil && runtimeCfg.Silencing.Enabled {
|
if runtimeCfg != nil && runtimeCfg.Silencing.Enabled {
|
||||||
if isOpsAlertSilenced(time.Now().UTC(), rule, event, runtimeCfg.Silencing) {
|
if isOpsAlertSilenced(time.Now().UTC(), rule, event, runtimeCfg.Silencing) {
|
||||||
return
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -639,6 +654,7 @@ func (s *OpsAlertEvaluatorService) maybeSendAlertEmail(ctx context.Context, runt
|
|||||||
if anySent {
|
if anySent {
|
||||||
_ = s.opsRepo.UpdateAlertEventEmailSent(context.Background(), event.ID, true)
|
_ = s.opsRepo.UpdateAlertEventEmailSent(context.Background(), event.ID, true)
|
||||||
}
|
}
|
||||||
|
return anySent
|
||||||
}
|
}
|
||||||
|
|
||||||
func buildOpsAlertEmailBody(rule *OpsAlertRule, event *OpsAlertEvent) string {
|
func buildOpsAlertEmailBody(rule *OpsAlertRule, event *OpsAlertEvent) string {
|
||||||
@@ -806,7 +822,7 @@ func (s *OpsAlertEvaluatorService) maybeLogSkip(key string) {
|
|||||||
log.Printf("[OpsAlertEvaluator] leader lock held by another instance; skipping (key=%q)", key)
|
log.Printf("[OpsAlertEvaluator] leader lock held by another instance; skipping (key=%q)", key)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *OpsAlertEvaluatorService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration) {
|
func (s *OpsAlertEvaluatorService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration, result string) {
|
||||||
if s == nil || s.opsRepo == nil {
|
if s == nil || s.opsRepo == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -814,11 +830,17 @@ func (s *OpsAlertEvaluatorService) recordHeartbeatSuccess(runAt time.Time, durat
|
|||||||
durMs := duration.Milliseconds()
|
durMs := duration.Milliseconds()
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
msg := strings.TrimSpace(result)
|
||||||
|
if msg == "" {
|
||||||
|
msg = "ok"
|
||||||
|
}
|
||||||
|
msg = truncateString(msg, 2048)
|
||||||
_ = s.opsRepo.UpsertJobHeartbeat(ctx, &OpsUpsertJobHeartbeatInput{
|
_ = s.opsRepo.UpsertJobHeartbeat(ctx, &OpsUpsertJobHeartbeatInput{
|
||||||
JobName: opsAlertEvaluatorJobName,
|
JobName: opsAlertEvaluatorJobName,
|
||||||
LastRunAt: &runAt,
|
LastRunAt: &runAt,
|
||||||
LastSuccessAt: &now,
|
LastSuccessAt: &now,
|
||||||
LastDurationMs: &durMs,
|
LastDurationMs: &durMs,
|
||||||
|
LastResult: &msg,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -149,7 +149,7 @@ func (s *OpsCleanupService) runScheduled() {
|
|||||||
log.Printf("[OpsCleanup] cleanup failed: %v", err)
|
log.Printf("[OpsCleanup] cleanup failed: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
s.recordHeartbeatSuccess(runAt, time.Since(startedAt))
|
s.recordHeartbeatSuccess(runAt, time.Since(startedAt), counts)
|
||||||
log.Printf("[OpsCleanup] cleanup complete: %s", counts)
|
log.Printf("[OpsCleanup] cleanup complete: %s", counts)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -330,12 +330,13 @@ func (s *OpsCleanupService) tryAcquireLeaderLock(ctx context.Context) (func(), b
|
|||||||
return release, true
|
return release, true
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *OpsCleanupService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration) {
|
func (s *OpsCleanupService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration, counts opsCleanupDeletedCounts) {
|
||||||
if s == nil || s.opsRepo == nil {
|
if s == nil || s.opsRepo == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
now := time.Now().UTC()
|
now := time.Now().UTC()
|
||||||
durMs := duration.Milliseconds()
|
durMs := duration.Milliseconds()
|
||||||
|
result := truncateString(counts.String(), 2048)
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
_ = s.opsRepo.UpsertJobHeartbeat(ctx, &OpsUpsertJobHeartbeatInput{
|
_ = s.opsRepo.UpsertJobHeartbeat(ctx, &OpsUpsertJobHeartbeatInput{
|
||||||
@@ -343,6 +344,7 @@ func (s *OpsCleanupService) recordHeartbeatSuccess(runAt time.Time, duration tim
|
|||||||
LastRunAt: &runAt,
|
LastRunAt: &runAt,
|
||||||
LastSuccessAt: &now,
|
LastSuccessAt: &now,
|
||||||
LastDurationMs: &durMs,
|
LastDurationMs: &durMs,
|
||||||
|
LastResult: &result,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -235,6 +235,9 @@ type OpsUpsertJobHeartbeatInput struct {
|
|||||||
LastErrorAt *time.Time
|
LastErrorAt *time.Time
|
||||||
LastError *string
|
LastError *string
|
||||||
LastDurationMs *int64
|
LastDurationMs *int64
|
||||||
|
|
||||||
|
// LastResult is an optional human-readable summary of the last successful run.
|
||||||
|
LastResult *string
|
||||||
}
|
}
|
||||||
|
|
||||||
type OpsJobHeartbeat struct {
|
type OpsJobHeartbeat struct {
|
||||||
@@ -245,6 +248,7 @@ type OpsJobHeartbeat struct {
|
|||||||
LastErrorAt *time.Time `json:"last_error_at"`
|
LastErrorAt *time.Time `json:"last_error_at"`
|
||||||
LastError *string `json:"last_error"`
|
LastError *string `json:"last_error"`
|
||||||
LastDurationMs *int64 `json:"last_duration_ms"`
|
LastDurationMs *int64 `json:"last_duration_ms"`
|
||||||
|
LastResult *string `json:"last_result"`
|
||||||
|
|
||||||
UpdatedAt time.Time `json:"updated_at"`
|
UpdatedAt time.Time `json:"updated_at"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -177,6 +177,10 @@ func (s *OpsScheduledReportService) runOnce() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
reportsTotal := len(reports)
|
||||||
|
reportsDue := 0
|
||||||
|
sentAttempts := 0
|
||||||
|
|
||||||
for _, report := range reports {
|
for _, report := range reports {
|
||||||
if report == nil || !report.Enabled {
|
if report == nil || !report.Enabled {
|
||||||
continue
|
continue
|
||||||
@@ -184,14 +188,18 @@ func (s *OpsScheduledReportService) runOnce() {
|
|||||||
if report.NextRunAt.After(now) {
|
if report.NextRunAt.After(now) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
reportsDue++
|
||||||
|
|
||||||
if err := s.runReport(ctx, report, now); err != nil {
|
attempts, err := s.runReport(ctx, report, now)
|
||||||
|
if err != nil {
|
||||||
s.recordHeartbeatError(runAt, time.Since(startedAt), err)
|
s.recordHeartbeatError(runAt, time.Since(startedAt), err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
sentAttempts += attempts
|
||||||
}
|
}
|
||||||
|
|
||||||
s.recordHeartbeatSuccess(runAt, time.Since(startedAt))
|
result := truncateString(fmt.Sprintf("reports=%d due=%d send_attempts=%d", reportsTotal, reportsDue, sentAttempts), 2048)
|
||||||
|
s.recordHeartbeatSuccess(runAt, time.Since(startedAt), result)
|
||||||
}
|
}
|
||||||
|
|
||||||
type opsScheduledReport struct {
|
type opsScheduledReport struct {
|
||||||
@@ -297,9 +305,9 @@ func (s *OpsScheduledReportService) listScheduledReports(ctx context.Context, no
|
|||||||
return out
|
return out
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *OpsScheduledReportService) runReport(ctx context.Context, report *opsScheduledReport, now time.Time) error {
|
func (s *OpsScheduledReportService) runReport(ctx context.Context, report *opsScheduledReport, now time.Time) (int, error) {
|
||||||
if s == nil || s.opsService == nil || s.emailService == nil || report == nil {
|
if s == nil || s.opsService == nil || s.emailService == nil || report == nil {
|
||||||
return nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
if ctx == nil {
|
if ctx == nil {
|
||||||
ctx = context.Background()
|
ctx = context.Background()
|
||||||
@@ -310,11 +318,11 @@ func (s *OpsScheduledReportService) runReport(ctx context.Context, report *opsSc
|
|||||||
|
|
||||||
content, err := s.generateReportHTML(ctx, report, now)
|
content, err := s.generateReportHTML(ctx, report, now)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return 0, err
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(content) == "" {
|
if strings.TrimSpace(content) == "" {
|
||||||
// Skip sending when the report decides not to emit content (e.g., digest below min count).
|
// Skip sending when the report decides not to emit content (e.g., digest below min count).
|
||||||
return nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
recipients := report.Recipients
|
recipients := report.Recipients
|
||||||
@@ -325,22 +333,24 @@ func (s *OpsScheduledReportService) runReport(ctx context.Context, report *opsSc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if len(recipients) == 0 {
|
if len(recipients) == 0 {
|
||||||
return nil
|
return 0, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
subject := fmt.Sprintf("[Ops Report] %s", strings.TrimSpace(report.Name))
|
subject := fmt.Sprintf("[Ops Report] %s", strings.TrimSpace(report.Name))
|
||||||
|
|
||||||
|
attempts := 0
|
||||||
for _, to := range recipients {
|
for _, to := range recipients {
|
||||||
addr := strings.TrimSpace(to)
|
addr := strings.TrimSpace(to)
|
||||||
if addr == "" {
|
if addr == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
attempts++
|
||||||
if err := s.emailService.SendEmail(ctx, addr, subject, content); err != nil {
|
if err := s.emailService.SendEmail(ctx, addr, subject, content); err != nil {
|
||||||
// Ignore per-recipient failures; continue best-effort.
|
// Ignore per-recipient failures; continue best-effort.
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return nil
|
return attempts, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *OpsScheduledReportService) generateReportHTML(ctx context.Context, report *opsScheduledReport, now time.Time) (string, error) {
|
func (s *OpsScheduledReportService) generateReportHTML(ctx context.Context, report *opsScheduledReport, now time.Time) (string, error) {
|
||||||
@@ -650,7 +660,7 @@ func (s *OpsScheduledReportService) setLastRunAt(ctx context.Context, reportType
|
|||||||
_ = s.redisClient.Set(ctx, key, strconv.FormatInt(t.UTC().Unix(), 10), 14*24*time.Hour).Err()
|
_ = s.redisClient.Set(ctx, key, strconv.FormatInt(t.UTC().Unix(), 10), 14*24*time.Hour).Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *OpsScheduledReportService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration) {
|
func (s *OpsScheduledReportService) recordHeartbeatSuccess(runAt time.Time, duration time.Duration, result string) {
|
||||||
if s == nil || s.opsService == nil || s.opsService.opsRepo == nil {
|
if s == nil || s.opsService == nil || s.opsService.opsRepo == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -658,11 +668,17 @@ func (s *OpsScheduledReportService) recordHeartbeatSuccess(runAt time.Time, dura
|
|||||||
durMs := duration.Milliseconds()
|
durMs := duration.Milliseconds()
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
|
||||||
defer cancel()
|
defer cancel()
|
||||||
|
msg := strings.TrimSpace(result)
|
||||||
|
if msg == "" {
|
||||||
|
msg = "ok"
|
||||||
|
}
|
||||||
|
msg = truncateString(msg, 2048)
|
||||||
_ = s.opsService.opsRepo.UpsertJobHeartbeat(ctx, &OpsUpsertJobHeartbeatInput{
|
_ = s.opsService.opsRepo.UpsertJobHeartbeat(ctx, &OpsUpsertJobHeartbeatInput{
|
||||||
JobName: opsScheduledReportJobName,
|
JobName: opsScheduledReportJobName,
|
||||||
LastRunAt: &runAt,
|
LastRunAt: &runAt,
|
||||||
LastSuccessAt: &now,
|
LastSuccessAt: &now,
|
||||||
LastDurationMs: &durMs,
|
LastDurationMs: &durMs,
|
||||||
|
LastResult: &msg,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,6 @@
|
|||||||
|
-- Add last_result to ops_job_heartbeats for UI job details.
|
||||||
|
|
||||||
|
ALTER TABLE IF EXISTS ops_job_heartbeats
|
||||||
|
ADD COLUMN IF NOT EXISTS last_result TEXT;
|
||||||
|
|
||||||
|
COMMENT ON COLUMN ops_job_heartbeats.last_result IS 'Last successful run result summary (human readable).';
|
||||||
@@ -293,6 +293,7 @@ export interface OpsJobHeartbeat {
|
|||||||
last_error_at?: string | null
|
last_error_at?: string | null
|
||||||
last_error?: string | null
|
last_error?: string | null
|
||||||
last_duration_ms?: number | null
|
last_duration_ms?: number | null
|
||||||
|
last_result?: string | null
|
||||||
updated_at: string
|
updated_at: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -414,7 +414,7 @@ const handleScroll = () => {
|
|||||||
menu.show = false
|
menu.show = false
|
||||||
}
|
}
|
||||||
|
|
||||||
onMounted(async () => { load(); try { const [p, g] = await Promise.all([adminAPI.proxies.getAll(), adminAPI.groups.getAll()]); proxies.value = p; groups.value = g } catch (error) { console.error('Failed to load proxies/groups:', error) }; window.addEventListener('scroll', handleScroll, true) })
|
onMounted(async () => { load(); try { const [p, g] = await Promise.all([adminAPI.proxies.getAll(), adminAPI.groups.getAll()]); proxies.value = p; groups.value = g } catch (error) { console.error('Failed to load proxies/groups:', error) } window.addEventListener('scroll', handleScroll, true) })
|
||||||
|
|
||||||
onUnmounted(() => {
|
onUnmounted(() => {
|
||||||
window.removeEventListener('scroll', handleScroll, true)
|
window.removeEventListener('scroll', handleScroll, true)
|
||||||
|
|||||||
@@ -42,7 +42,7 @@
|
|||||||
<!-- Row: Concurrency + Throughput -->
|
<!-- Row: Concurrency + Throughput -->
|
||||||
<div v-if="opsEnabled && !(loading && !hasLoadedOnce)" class="grid grid-cols-1 gap-6 lg:grid-cols-3">
|
<div v-if="opsEnabled && !(loading && !hasLoadedOnce)" class="grid grid-cols-1 gap-6 lg:grid-cols-3">
|
||||||
<div class="lg:col-span-1 min-h-[360px]">
|
<div class="lg:col-span-1 min-h-[360px]">
|
||||||
<OpsConcurrencyCard :platform-filter="platform" :group-id-filter="groupId" />
|
<OpsConcurrencyCard :platform-filter="platform" :group-id-filter="groupId" :refresh-token="dashboardRefreshToken" />
|
||||||
</div>
|
</div>
|
||||||
<div class="lg:col-span-2 min-h-[360px]">
|
<div class="lg:col-span-2 min-h-[360px]">
|
||||||
<OpsThroughputTrendChart
|
<OpsThroughputTrendChart
|
||||||
@@ -352,6 +352,9 @@ const autoRefreshEnabled = ref(false)
|
|||||||
const autoRefreshIntervalMs = ref(30000) // default 30 seconds
|
const autoRefreshIntervalMs = ref(30000) // default 30 seconds
|
||||||
const autoRefreshCountdown = ref(0)
|
const autoRefreshCountdown = ref(0)
|
||||||
|
|
||||||
|
// Used to trigger child component refreshes in a single shared cadence.
|
||||||
|
const dashboardRefreshToken = ref(0)
|
||||||
|
|
||||||
// Auto refresh timer
|
// Auto refresh timer
|
||||||
const { pause: pauseAutoRefresh, resume: resumeAutoRefresh } = useIntervalFn(
|
const { pause: pauseAutoRefresh, resume: resumeAutoRefresh } = useIntervalFn(
|
||||||
() => {
|
() => {
|
||||||
@@ -597,7 +600,12 @@ async function fetchData() {
|
|||||||
refreshErrorDistributionWithCancel(fetchSeq, dashboardFetchController.signal)
|
refreshErrorDistributionWithCancel(fetchSeq, dashboardFetchController.signal)
|
||||||
])
|
])
|
||||||
if (fetchSeq !== dashboardFetchSeq) return
|
if (fetchSeq !== dashboardFetchSeq) return
|
||||||
|
|
||||||
lastUpdated.value = new Date()
|
lastUpdated.value = new Date()
|
||||||
|
|
||||||
|
// Trigger child component refreshes using the same cadence as the header.
|
||||||
|
dashboardRefreshToken.value += 1
|
||||||
|
|
||||||
// Reset auto refresh countdown after successful fetch
|
// Reset auto refresh countdown after successful fetch
|
||||||
if (autoRefreshEnabled.value) {
|
if (autoRefreshEnabled.value) {
|
||||||
autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
|
autoRefreshCountdown.value = Math.floor(autoRefreshIntervalMs.value / 1000)
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
|
import { computed, ref, watch } from 'vue'
|
||||||
import { useI18n } from 'vue-i18n'
|
import { useI18n } from 'vue-i18n'
|
||||||
import { useIntervalFn } from '@vueuse/core'
|
|
||||||
import { opsAPI, type OpsAccountAvailabilityStatsResponse, type OpsConcurrencyStatsResponse } from '@/api/admin/ops'
|
import { opsAPI, type OpsAccountAvailabilityStatsResponse, type OpsConcurrencyStatsResponse } from '@/api/admin/ops'
|
||||||
|
|
||||||
interface Props {
|
interface Props {
|
||||||
platformFilter?: string
|
platformFilter?: string
|
||||||
groupIdFilter?: number | null
|
groupIdFilter?: number | null
|
||||||
|
refreshToken: number
|
||||||
}
|
}
|
||||||
|
|
||||||
const props = withDefaults(defineProps<Props>(), {
|
const props = withDefaults(defineProps<Props>(), {
|
||||||
@@ -233,15 +233,13 @@ async function loadData() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 定期刷新(5秒)
|
// 刷新节奏由父组件统一控制(OpsDashboard Header 的刷新状态/倒计时)
|
||||||
const { pause: pauseRefresh, resume: resumeRefresh } = useIntervalFn(
|
watch(
|
||||||
|
() => props.refreshToken,
|
||||||
() => {
|
() => {
|
||||||
if (realtimeEnabled.value) {
|
if (!realtimeEnabled.value) return
|
||||||
loadData()
|
loadData()
|
||||||
}
|
}
|
||||||
},
|
|
||||||
5000,
|
|
||||||
{ immediate: false }
|
|
||||||
)
|
)
|
||||||
|
|
||||||
function getLoadBarClass(loadPct: number): string {
|
function getLoadBarClass(loadPct: number): string {
|
||||||
@@ -271,23 +269,15 @@ function formatDuration(seconds: number): string {
|
|||||||
return `${hours}h`
|
return `${hours}h`
|
||||||
}
|
}
|
||||||
|
|
||||||
onMounted(() => {
|
watch(
|
||||||
loadData()
|
() => realtimeEnabled.value,
|
||||||
resumeRefresh()
|
async (enabled) => {
|
||||||
})
|
if (enabled) {
|
||||||
|
await loadData()
|
||||||
onUnmounted(() => {
|
}
|
||||||
pauseRefresh()
|
},
|
||||||
})
|
{ immediate: true }
|
||||||
|
)
|
||||||
watch(realtimeEnabled, async (enabled) => {
|
|
||||||
if (!enabled) {
|
|
||||||
pauseRefresh()
|
|
||||||
} else {
|
|
||||||
resumeRefresh()
|
|
||||||
await loadData()
|
|
||||||
}
|
|
||||||
})
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<template>
|
<template>
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
<script setup lang="ts">
|
<script setup lang="ts">
|
||||||
import { computed, onMounted, onUnmounted, ref, watch } from 'vue'
|
import { computed, onMounted, ref, watch } from 'vue'
|
||||||
import { useIntervalFn } from '@vueuse/core'
|
|
||||||
import { useI18n } from 'vue-i18n'
|
import { useI18n } from 'vue-i18n'
|
||||||
import Select from '@/components/common/Select.vue'
|
import Select from '@/components/common/Select.vue'
|
||||||
import HelpTooltip from '@/components/common/HelpTooltip.vue'
|
import HelpTooltip from '@/components/common/HelpTooltip.vue'
|
||||||
@@ -315,31 +314,33 @@ watch(
|
|||||||
{ immediate: true }
|
{ immediate: true }
|
||||||
)
|
)
|
||||||
|
|
||||||
const { pause: pauseRealtimeTrafficRefresh, resume: resumeRealtimeTrafficRefresh } = useIntervalFn(
|
|
||||||
() => {
|
|
||||||
loadRealtimeTrafficSummary()
|
|
||||||
},
|
|
||||||
5000,
|
|
||||||
{ immediate: false }
|
|
||||||
)
|
|
||||||
|
|
||||||
watch(
|
watch(
|
||||||
() => adminSettingsStore.opsRealtimeMonitoringEnabled,
|
() => adminSettingsStore.opsRealtimeMonitoringEnabled,
|
||||||
(enabled) => {
|
(enabled) => {
|
||||||
if (enabled) {
|
if (!enabled) {
|
||||||
resumeRealtimeTrafficRefresh()
|
|
||||||
} else {
|
|
||||||
pauseRealtimeTrafficRefresh()
|
|
||||||
// Keep UI stable when realtime monitoring is turned off.
|
// Keep UI stable when realtime monitoring is turned off.
|
||||||
realtimeTrafficSummary.value = makeZeroRealtimeTrafficSummary()
|
realtimeTrafficSummary.value = makeZeroRealtimeTrafficSummary()
|
||||||
|
} else {
|
||||||
|
loadRealtimeTrafficSummary()
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{ immediate: true }
|
{ immediate: true }
|
||||||
)
|
)
|
||||||
|
|
||||||
onUnmounted(() => {
|
// Realtime traffic refresh follows the parent (OpsDashboard) refresh cadence.
|
||||||
pauseRealtimeTrafficRefresh()
|
watch(
|
||||||
})
|
() => [props.autoRefreshEnabled, props.autoRefreshCountdown, props.loading] as const,
|
||||||
|
([enabled, countdown, loading]) => {
|
||||||
|
if (!enabled) return
|
||||||
|
if (loading) return
|
||||||
|
// Treat countdown reset (or reaching 0) as a refresh boundary.
|
||||||
|
if (countdown === 0) {
|
||||||
|
loadRealtimeTrafficSummary()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
// no-op: parent controls refresh cadence
|
||||||
|
|
||||||
const displayRealTimeQps = computed(() => {
|
const displayRealTimeQps = computed(() => {
|
||||||
const v = realtimeTrafficSummary.value?.qps?.current
|
const v = realtimeTrafficSummary.value?.qps?.current
|
||||||
@@ -1442,7 +1443,7 @@ function handleToolbarRefresh() {
|
|||||||
<!-- MEM -->
|
<!-- MEM -->
|
||||||
<div class="rounded-xl bg-gray-50 p-3 dark:bg-dark-900">
|
<div class="rounded-xl bg-gray-50 p-3 dark:bg-dark-900">
|
||||||
<div class="flex items-center gap-1">
|
<div class="flex items-center gap-1">
|
||||||
<div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">{{ t('admin.ops.mem') }}</div>
|
<div class="text-[10px] font-bold uppercase tracking-wider text-gray-400">{{ t('admin.ops.memory') }}</div>
|
||||||
<HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.memory')" />
|
<HelpTooltip v-if="!props.fullscreen" :content="t('admin.ops.tooltips.memory')" />
|
||||||
</div>
|
</div>
|
||||||
<div class="mt-1 text-lg font-black" :class="memPercentClass">
|
<div class="mt-1 text-lg font-black" :class="memPercentClass">
|
||||||
@@ -1545,7 +1546,10 @@ function handleToolbarRefresh() {
|
|||||||
>
|
>
|
||||||
<div class="flex items-center justify-between gap-3">
|
<div class="flex items-center justify-between gap-3">
|
||||||
<div class="truncate text-sm font-semibold text-gray-900 dark:text-white">{{ hb.job_name }}</div>
|
<div class="truncate text-sm font-semibold text-gray-900 dark:text-white">{{ hb.job_name }}</div>
|
||||||
<div class="text-xs text-gray-500 dark:text-gray-400">{{ formatTimeShort(hb.updated_at) }}</div>
|
<div class="flex items-center gap-3 text-xs text-gray-500 dark:text-gray-400">
|
||||||
|
<span v-if="hb.last_duration_ms != null" class="font-mono">{{ hb.last_duration_ms }}ms</span>
|
||||||
|
<span>{{ formatTimeShort(hb.updated_at) }}</span>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="mt-2 grid grid-cols-1 gap-2 text-xs text-gray-600 dark:text-gray-300 sm:grid-cols-2">
|
<div class="mt-2 grid grid-cols-1 gap-2 text-xs text-gray-600 dark:text-gray-300 sm:grid-cols-2">
|
||||||
@@ -1555,6 +1559,9 @@ function handleToolbarRefresh() {
|
|||||||
<div>
|
<div>
|
||||||
{{ t('admin.ops.lastError') }} <span class="font-mono">{{ formatTimeShort(hb.last_error_at) }}</span>
|
{{ t('admin.ops.lastError') }} <span class="font-mono">{{ formatTimeShort(hb.last_error_at) }}</span>
|
||||||
</div>
|
</div>
|
||||||
|
<div>
|
||||||
|
{{ t('admin.ops.result') }} <span class="font-mono">{{ hb.last_result || '-' }}</span>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div
|
<div
|
||||||
|
|||||||
Reference in New Issue
Block a user