代码审查反馈: 1. 文件行数超标:ops_cleanup_service.go 594→413 行。 拆 opsCleanupPlan / deleteOldRowsByID / truncateOpsTable / isMissingRelationError + counts struct 到 ops_cleanup_executor.go (164 行)。 2. runCleanupOnce 89 行→30 行(table-driven): 用 []opsCleanupTarget 循环替代三组重复的 opsCleanupPlan → runOne → assign。 3. 魔法值提取常量: opsCleanupDefaultSchedule / opsCleanupBatchSize / opsCleanupCronStopTimeout / opsCleanupRunTimeout / opsCleanupHeartbeatTimeout。 ops_settings.go 中 "0 2 * * *" 也统一引用 opsCleanupDefaultSchedule。 4. 补 5 个缺失测试: - Reload 未 Start 时 no-op - Reload 已 Stop 后 no-op - cleanupReloader==nil 时 Update 不 panic - Start 重复调用幂等 - refreshEffectiveBeforeRun 正确更新 snapshot
165 lines
3.6 KiB
Go
165 lines
3.6 KiB
Go
package service
|
||
|
||
import (
|
||
"context"
|
||
"database/sql"
|
||
"fmt"
|
||
"strings"
|
||
"time"
|
||
)
|
||
|
||
const (
|
||
opsCleanupDefaultSchedule = "0 2 * * *"
|
||
opsCleanupBatchSize = 5000
|
||
opsCleanupCronStopTimeout = 3 * time.Second
|
||
opsCleanupRunTimeout = 30 * time.Minute
|
||
opsCleanupHeartbeatTimeout = 2 * time.Second
|
||
)
|
||
|
||
type opsCleanupTarget struct {
|
||
retentionDays int
|
||
table string
|
||
timeCol string
|
||
castDate bool
|
||
counter *int64
|
||
}
|
||
|
||
type opsCleanupDeletedCounts struct {
|
||
errorLogs int64
|
||
retryAttempts int64
|
||
alertEvents int64
|
||
systemLogs int64
|
||
logAudits int64
|
||
systemMetrics int64
|
||
hourlyPreagg int64
|
||
dailyPreagg int64
|
||
}
|
||
|
||
func (c opsCleanupDeletedCounts) String() string {
|
||
return fmt.Sprintf(
|
||
"error_logs=%d retry_attempts=%d alert_events=%d system_logs=%d log_audits=%d system_metrics=%d hourly_preagg=%d daily_preagg=%d",
|
||
c.errorLogs,
|
||
c.retryAttempts,
|
||
c.alertEvents,
|
||
c.systemLogs,
|
||
c.logAudits,
|
||
c.systemMetrics,
|
||
c.hourlyPreagg,
|
||
c.dailyPreagg,
|
||
)
|
||
}
|
||
|
||
// opsCleanupPlan 把"保留天数"翻译成具体的清理动作。
|
||
// - days < 0 → 跳过该项清理(ok=false),保留兼容老数据
|
||
// - days == 0 → TRUNCATE TABLE(O(1) 全清),truncate=true
|
||
// - days > 0 → 批量 DELETE 早于 now-N天 的行,cutoff = now - N 天
|
||
func opsCleanupPlan(now time.Time, days int) (cutoff time.Time, truncate, ok bool) {
|
||
if days < 0 {
|
||
return time.Time{}, false, false
|
||
}
|
||
if days == 0 {
|
||
return time.Time{}, true, true
|
||
}
|
||
return now.AddDate(0, 0, -days), false, true
|
||
}
|
||
|
||
func opsCleanupRunOne(
|
||
ctx context.Context,
|
||
db *sql.DB,
|
||
truncate bool,
|
||
cutoff time.Time,
|
||
table, timeCol string,
|
||
castDate bool,
|
||
batchSize int,
|
||
) (int64, error) {
|
||
if truncate {
|
||
return truncateOpsTable(ctx, db, table)
|
||
}
|
||
return deleteOldRowsByID(ctx, db, table, timeCol, cutoff, batchSize, castDate)
|
||
}
|
||
|
||
func deleteOldRowsByID(
|
||
ctx context.Context,
|
||
db *sql.DB,
|
||
table string,
|
||
timeColumn string,
|
||
cutoff time.Time,
|
||
batchSize int,
|
||
castCutoffToDate bool,
|
||
) (int64, error) {
|
||
if db == nil {
|
||
return 0, nil
|
||
}
|
||
if batchSize <= 0 {
|
||
batchSize = opsCleanupBatchSize
|
||
}
|
||
|
||
where := fmt.Sprintf("%s < $1", timeColumn)
|
||
if castCutoffToDate {
|
||
where = fmt.Sprintf("%s < $1::date", timeColumn)
|
||
}
|
||
|
||
q := fmt.Sprintf(`
|
||
WITH batch AS (
|
||
SELECT id FROM %s
|
||
WHERE %s
|
||
ORDER BY id
|
||
LIMIT $2
|
||
)
|
||
DELETE FROM %s
|
||
WHERE id IN (SELECT id FROM batch)
|
||
`, table, where, table)
|
||
|
||
var total int64
|
||
for {
|
||
res, err := db.ExecContext(ctx, q, cutoff, batchSize)
|
||
if err != nil {
|
||
if isMissingRelationError(err) {
|
||
return total, nil
|
||
}
|
||
return total, err
|
||
}
|
||
affected, err := res.RowsAffected()
|
||
if err != nil {
|
||
return total, err
|
||
}
|
||
total += affected
|
||
if affected == 0 {
|
||
break
|
||
}
|
||
}
|
||
return total, nil
|
||
}
|
||
|
||
// truncateOpsTable 用 TRUNCATE TABLE 清空指定表,先 SELECT COUNT(*) 取得清空前行数用于 heartbeat。
|
||
func truncateOpsTable(ctx context.Context, db *sql.DB, table string) (int64, error) {
|
||
if db == nil {
|
||
return 0, nil
|
||
}
|
||
var count int64
|
||
if err := db.QueryRowContext(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s", table)).Scan(&count); err != nil {
|
||
if isMissingRelationError(err) {
|
||
return 0, nil
|
||
}
|
||
return 0, fmt.Errorf("count %s: %w", table, err)
|
||
}
|
||
if count == 0 {
|
||
return 0, nil
|
||
}
|
||
if _, err := db.ExecContext(ctx, fmt.Sprintf("TRUNCATE TABLE %s", table)); err != nil {
|
||
if isMissingRelationError(err) {
|
||
return 0, nil
|
||
}
|
||
return 0, fmt.Errorf("truncate %s: %w", table, err)
|
||
}
|
||
return count, nil
|
||
}
|
||
|
||
func isMissingRelationError(err error) bool {
|
||
if err == nil {
|
||
return false
|
||
}
|
||
s := strings.ToLower(err.Error())
|
||
return strings.Contains(s, "does not exist") && strings.Contains(s, "relation")
|
||
}
|