feat(channel-monitor): aggregate history to daily rollups + soft delete
明细只保留 1 天,超过 1 天聚合到新表 channel_monitor_daily_rollups(按 monitor_id/model/bucket_date 维度),聚合保留 30 天。两张表都用 SoftDeleteMixin 软删除(DELETE 自动改为 UPDATE deleted_at = NOW())。 聚合 + 清理任务由 OpsCleanupService 的 cron 统一调度,与运维监控的清理共享 schedule(默认 0 2 * * *)和 leader lock。ChannelMonitorRunner 的 cleanupLoop 被移除,只保留 dueCheckLoop。 读取路径 ComputeAvailability* 改为 UNION 明细(今天 deleted_at IS NULL)+ 聚合(过去 windowDays 天 deleted_at IS NULL),SUM(ok)/SUM(total) 自然加权 计算可用率,AVG latency 用 SUM(sum_latency_ms)/SUM(count_latency)。 watermark 表 channel_monitor_aggregation_watermark 单行(id=1),记录 last_aggregated_date,重启后从该日期 +1 继续聚合,首次为 nil 则从 today - 30d 开始回填,单次最多 35 天上限避免长事务。 raw SQL 的 ListLatestPerModel / ListLatestForMonitorIDs / ListRecentHistoryForMonitors 都补上 deleted_at IS NULL 过滤(SoftDeleteMixin interceptor 只对 ent query 生效)。 bump version to 0.1.114.28 GroupBadge 在 MonitorKeyPickerDialog 中复用平台主题色 + 倍率/专属倍率 (顺手优化)。
This commit is contained in:
@@ -36,11 +36,15 @@ return 0
|
||||
// - Scheduling: 5-field cron spec (minute hour dom month dow).
|
||||
// - Multi-instance: best-effort Redis leader lock so only one node runs cleanup.
|
||||
// - Safety: deletes in batches to avoid long transactions.
|
||||
//
|
||||
// 附带:在 runCleanupOnce 末尾调用 ChannelMonitorService.RunDailyMaintenance,
|
||||
// 统一共享 cron schedule + leader lock + heartbeat,避免再引一套调度。
|
||||
type OpsCleanupService struct {
|
||||
opsRepo OpsRepository
|
||||
db *sql.DB
|
||||
redisClient *redis.Client
|
||||
cfg *config.Config
|
||||
opsRepo OpsRepository
|
||||
db *sql.DB
|
||||
redisClient *redis.Client
|
||||
cfg *config.Config
|
||||
channelMonitorSvc *ChannelMonitorService
|
||||
|
||||
instanceID string
|
||||
|
||||
@@ -57,13 +61,15 @@ func NewOpsCleanupService(
|
||||
db *sql.DB,
|
||||
redisClient *redis.Client,
|
||||
cfg *config.Config,
|
||||
channelMonitorSvc *ChannelMonitorService,
|
||||
) *OpsCleanupService {
|
||||
return &OpsCleanupService{
|
||||
opsRepo: opsRepo,
|
||||
db: db,
|
||||
redisClient: redisClient,
|
||||
cfg: cfg,
|
||||
instanceID: uuid.NewString(),
|
||||
opsRepo: opsRepo,
|
||||
db: db,
|
||||
redisClient: redisClient,
|
||||
cfg: cfg,
|
||||
channelMonitorSvc: channelMonitorSvc,
|
||||
instanceID: uuid.NewString(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -248,6 +254,15 @@ func (s *OpsCleanupService) runCleanupOnce(ctx context.Context) (opsCleanupDelet
|
||||
out.dailyPreagg = n
|
||||
}
|
||||
|
||||
// Channel monitor 每日维护(聚合昨日明细 + 软删过期明细/聚合)。
|
||||
// 失败只记日志,不影响 ops 清理的成功状态(与 ops 各步骤风格一致);
|
||||
// 维护本身已经把每步错误打到 slog,heartbeat result 不再分项记录。
|
||||
if s.channelMonitorSvc != nil {
|
||||
if err := s.channelMonitorSvc.RunDailyMaintenance(ctx); err != nil {
|
||||
logger.LegacyPrintf("service.ops_cleanup", "[OpsCleanup] channel monitor maintenance failed: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user