- 新增 ops 主服务(ops_service.go)和端口定义(ops_port.go) - 实现账号可用性检查服务(ops_account_availability.go) - 实现数据聚合服务(ops_aggregation_service.go) - 实现告警评估服务(ops_alert_evaluator_service.go) - 实现告警管理服务(ops_alerts.go) - 实现数据清理服务(ops_cleanup_service.go) - 实现并发控制服务(ops_concurrency.go) - 实现仪表板服务(ops_dashboard.go) - 实现错误处理服务(ops_errors.go) - 实现直方图服务(ops_histograms.go) - 实现指标采集服务(ops_metrics_collector.go) - 实现查询模式服务(ops_query_mode.go) - 实现实时监控服务(ops_realtime.go) - 实现请求详情服务(ops_request_details.go) - 实现重试机制服务(ops_retry.go) - 实现配置管理服务(ops_settings.go) - 实现趋势分析服务(ops_trends.go) - 实现窗口统计服务(ops_window_stats.go) - 添加 ops 相关领域常量 - 注册 service 依赖注入
355 lines
11 KiB
Go
355 lines
11 KiB
Go
package service
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
opsAlertEvaluatorLeaderLockKeyDefault = "ops:alert:evaluator:leader"
|
|
opsAlertEvaluatorLeaderLockTTLDefault = 30 * time.Second
|
|
)
|
|
|
|
// =========================
|
|
// Email notification config
|
|
// =========================
|
|
|
|
func (s *OpsService) GetEmailNotificationConfig(ctx context.Context) (*OpsEmailNotificationConfig, error) {
|
|
defaultCfg := defaultOpsEmailNotificationConfig()
|
|
if s == nil || s.settingRepo == nil {
|
|
return defaultCfg, nil
|
|
}
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
|
|
raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsEmailNotificationConfig)
|
|
if err != nil {
|
|
if errors.Is(err, ErrSettingNotFound) {
|
|
// Initialize defaults on first read (best-effort).
|
|
if b, mErr := json.Marshal(defaultCfg); mErr == nil {
|
|
_ = s.settingRepo.Set(ctx, SettingKeyOpsEmailNotificationConfig, string(b))
|
|
}
|
|
return defaultCfg, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
cfg := &OpsEmailNotificationConfig{}
|
|
if err := json.Unmarshal([]byte(raw), cfg); err != nil {
|
|
// Corrupted JSON should not break ops UI; fall back to defaults.
|
|
return defaultCfg, nil
|
|
}
|
|
normalizeOpsEmailNotificationConfig(cfg)
|
|
return cfg, nil
|
|
}
|
|
|
|
func (s *OpsService) UpdateEmailNotificationConfig(ctx context.Context, req *OpsEmailNotificationConfigUpdateRequest) (*OpsEmailNotificationConfig, error) {
|
|
if s == nil || s.settingRepo == nil {
|
|
return nil, errors.New("setting repository not initialized")
|
|
}
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
if req == nil {
|
|
return nil, errors.New("invalid request")
|
|
}
|
|
|
|
cfg, err := s.GetEmailNotificationConfig(ctx)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if req.Alert != nil {
|
|
cfg.Alert.Enabled = req.Alert.Enabled
|
|
if req.Alert.Recipients != nil {
|
|
cfg.Alert.Recipients = req.Alert.Recipients
|
|
}
|
|
cfg.Alert.MinSeverity = strings.TrimSpace(req.Alert.MinSeverity)
|
|
cfg.Alert.RateLimitPerHour = req.Alert.RateLimitPerHour
|
|
cfg.Alert.BatchingWindowSeconds = req.Alert.BatchingWindowSeconds
|
|
cfg.Alert.IncludeResolvedAlerts = req.Alert.IncludeResolvedAlerts
|
|
}
|
|
|
|
if req.Report != nil {
|
|
cfg.Report.Enabled = req.Report.Enabled
|
|
if req.Report.Recipients != nil {
|
|
cfg.Report.Recipients = req.Report.Recipients
|
|
}
|
|
cfg.Report.DailySummaryEnabled = req.Report.DailySummaryEnabled
|
|
cfg.Report.DailySummarySchedule = strings.TrimSpace(req.Report.DailySummarySchedule)
|
|
cfg.Report.WeeklySummaryEnabled = req.Report.WeeklySummaryEnabled
|
|
cfg.Report.WeeklySummarySchedule = strings.TrimSpace(req.Report.WeeklySummarySchedule)
|
|
cfg.Report.ErrorDigestEnabled = req.Report.ErrorDigestEnabled
|
|
cfg.Report.ErrorDigestSchedule = strings.TrimSpace(req.Report.ErrorDigestSchedule)
|
|
cfg.Report.ErrorDigestMinCount = req.Report.ErrorDigestMinCount
|
|
cfg.Report.AccountHealthEnabled = req.Report.AccountHealthEnabled
|
|
cfg.Report.AccountHealthSchedule = strings.TrimSpace(req.Report.AccountHealthSchedule)
|
|
cfg.Report.AccountHealthErrorRateThreshold = req.Report.AccountHealthErrorRateThreshold
|
|
}
|
|
|
|
if err := validateOpsEmailNotificationConfig(cfg); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
normalizeOpsEmailNotificationConfig(cfg)
|
|
raw, err := json.Marshal(cfg)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := s.settingRepo.Set(ctx, SettingKeyOpsEmailNotificationConfig, string(raw)); err != nil {
|
|
return nil, err
|
|
}
|
|
return cfg, nil
|
|
}
|
|
|
|
func defaultOpsEmailNotificationConfig() *OpsEmailNotificationConfig {
|
|
return &OpsEmailNotificationConfig{
|
|
Alert: OpsEmailAlertConfig{
|
|
Enabled: true,
|
|
Recipients: []string{},
|
|
MinSeverity: "",
|
|
RateLimitPerHour: 0,
|
|
BatchingWindowSeconds: 0,
|
|
IncludeResolvedAlerts: false,
|
|
},
|
|
Report: OpsEmailReportConfig{
|
|
Enabled: false,
|
|
Recipients: []string{},
|
|
DailySummaryEnabled: false,
|
|
DailySummarySchedule: "0 9 * * *",
|
|
WeeklySummaryEnabled: false,
|
|
WeeklySummarySchedule: "0 9 * * 1",
|
|
ErrorDigestEnabled: false,
|
|
ErrorDigestSchedule: "0 9 * * *",
|
|
ErrorDigestMinCount: 10,
|
|
AccountHealthEnabled: false,
|
|
AccountHealthSchedule: "0 9 * * *",
|
|
AccountHealthErrorRateThreshold: 10.0,
|
|
},
|
|
}
|
|
}
|
|
|
|
func normalizeOpsEmailNotificationConfig(cfg *OpsEmailNotificationConfig) {
|
|
if cfg == nil {
|
|
return
|
|
}
|
|
if cfg.Alert.Recipients == nil {
|
|
cfg.Alert.Recipients = []string{}
|
|
}
|
|
if cfg.Report.Recipients == nil {
|
|
cfg.Report.Recipients = []string{}
|
|
}
|
|
|
|
cfg.Alert.MinSeverity = strings.TrimSpace(cfg.Alert.MinSeverity)
|
|
cfg.Report.DailySummarySchedule = strings.TrimSpace(cfg.Report.DailySummarySchedule)
|
|
cfg.Report.WeeklySummarySchedule = strings.TrimSpace(cfg.Report.WeeklySummarySchedule)
|
|
cfg.Report.ErrorDigestSchedule = strings.TrimSpace(cfg.Report.ErrorDigestSchedule)
|
|
cfg.Report.AccountHealthSchedule = strings.TrimSpace(cfg.Report.AccountHealthSchedule)
|
|
|
|
// Fill missing schedules with defaults to avoid breaking cron logic if clients send empty strings.
|
|
if cfg.Report.DailySummarySchedule == "" {
|
|
cfg.Report.DailySummarySchedule = "0 9 * * *"
|
|
}
|
|
if cfg.Report.WeeklySummarySchedule == "" {
|
|
cfg.Report.WeeklySummarySchedule = "0 9 * * 1"
|
|
}
|
|
if cfg.Report.ErrorDigestSchedule == "" {
|
|
cfg.Report.ErrorDigestSchedule = "0 9 * * *"
|
|
}
|
|
if cfg.Report.AccountHealthSchedule == "" {
|
|
cfg.Report.AccountHealthSchedule = "0 9 * * *"
|
|
}
|
|
}
|
|
|
|
func validateOpsEmailNotificationConfig(cfg *OpsEmailNotificationConfig) error {
|
|
if cfg == nil {
|
|
return errors.New("invalid config")
|
|
}
|
|
|
|
if cfg.Alert.RateLimitPerHour < 0 {
|
|
return errors.New("alert.rate_limit_per_hour must be >= 0")
|
|
}
|
|
if cfg.Alert.BatchingWindowSeconds < 0 {
|
|
return errors.New("alert.batching_window_seconds must be >= 0")
|
|
}
|
|
switch strings.TrimSpace(cfg.Alert.MinSeverity) {
|
|
case "", "critical", "warning", "info":
|
|
default:
|
|
return errors.New("alert.min_severity must be one of: critical, warning, info, or empty")
|
|
}
|
|
|
|
if cfg.Report.ErrorDigestMinCount < 0 {
|
|
return errors.New("report.error_digest_min_count must be >= 0")
|
|
}
|
|
if cfg.Report.AccountHealthErrorRateThreshold < 0 || cfg.Report.AccountHealthErrorRateThreshold > 100 {
|
|
return errors.New("report.account_health_error_rate_threshold must be between 0 and 100")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// =========================
|
|
// Alert runtime settings
|
|
// =========================
|
|
|
|
func defaultOpsAlertRuntimeSettings() *OpsAlertRuntimeSettings {
|
|
return &OpsAlertRuntimeSettings{
|
|
EvaluationIntervalSeconds: 60,
|
|
DistributedLock: OpsDistributedLockSettings{
|
|
Enabled: true,
|
|
Key: opsAlertEvaluatorLeaderLockKeyDefault,
|
|
TTLSeconds: int(opsAlertEvaluatorLeaderLockTTLDefault.Seconds()),
|
|
},
|
|
Silencing: OpsAlertSilencingSettings{
|
|
Enabled: false,
|
|
GlobalUntilRFC3339: "",
|
|
GlobalReason: "",
|
|
Entries: []OpsAlertSilenceEntry{},
|
|
},
|
|
}
|
|
}
|
|
|
|
func normalizeOpsDistributedLockSettings(s *OpsDistributedLockSettings, defaultKey string, defaultTTLSeconds int) {
|
|
if s == nil {
|
|
return
|
|
}
|
|
s.Key = strings.TrimSpace(s.Key)
|
|
if s.Key == "" {
|
|
s.Key = defaultKey
|
|
}
|
|
if s.TTLSeconds <= 0 {
|
|
s.TTLSeconds = defaultTTLSeconds
|
|
}
|
|
}
|
|
|
|
func normalizeOpsAlertSilencingSettings(s *OpsAlertSilencingSettings) {
|
|
if s == nil {
|
|
return
|
|
}
|
|
s.GlobalUntilRFC3339 = strings.TrimSpace(s.GlobalUntilRFC3339)
|
|
s.GlobalReason = strings.TrimSpace(s.GlobalReason)
|
|
if s.Entries == nil {
|
|
s.Entries = []OpsAlertSilenceEntry{}
|
|
}
|
|
for i := range s.Entries {
|
|
s.Entries[i].UntilRFC3339 = strings.TrimSpace(s.Entries[i].UntilRFC3339)
|
|
s.Entries[i].Reason = strings.TrimSpace(s.Entries[i].Reason)
|
|
}
|
|
}
|
|
|
|
func validateOpsDistributedLockSettings(s OpsDistributedLockSettings) error {
|
|
if strings.TrimSpace(s.Key) == "" {
|
|
return errors.New("distributed_lock.key is required")
|
|
}
|
|
if s.TTLSeconds <= 0 || s.TTLSeconds > int((24*time.Hour).Seconds()) {
|
|
return errors.New("distributed_lock.ttl_seconds must be between 1 and 86400")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateOpsAlertSilencingSettings(s OpsAlertSilencingSettings) error {
|
|
parse := func(raw string) error {
|
|
if strings.TrimSpace(raw) == "" {
|
|
return nil
|
|
}
|
|
if _, err := time.Parse(time.RFC3339, raw); err != nil {
|
|
return errors.New("silencing time must be RFC3339")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
if err := parse(s.GlobalUntilRFC3339); err != nil {
|
|
return err
|
|
}
|
|
for _, entry := range s.Entries {
|
|
if strings.TrimSpace(entry.UntilRFC3339) == "" {
|
|
return errors.New("silencing.entries.until_rfc3339 is required")
|
|
}
|
|
if _, err := time.Parse(time.RFC3339, entry.UntilRFC3339); err != nil {
|
|
return errors.New("silencing.entries.until_rfc3339 must be RFC3339")
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *OpsService) GetOpsAlertRuntimeSettings(ctx context.Context) (*OpsAlertRuntimeSettings, error) {
|
|
defaultCfg := defaultOpsAlertRuntimeSettings()
|
|
if s == nil || s.settingRepo == nil {
|
|
return defaultCfg, nil
|
|
}
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
|
|
raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsAlertRuntimeSettings)
|
|
if err != nil {
|
|
if errors.Is(err, ErrSettingNotFound) {
|
|
if b, mErr := json.Marshal(defaultCfg); mErr == nil {
|
|
_ = s.settingRepo.Set(ctx, SettingKeyOpsAlertRuntimeSettings, string(b))
|
|
}
|
|
return defaultCfg, nil
|
|
}
|
|
return nil, err
|
|
}
|
|
|
|
cfg := &OpsAlertRuntimeSettings{}
|
|
if err := json.Unmarshal([]byte(raw), cfg); err != nil {
|
|
return defaultCfg, nil
|
|
}
|
|
|
|
if cfg.EvaluationIntervalSeconds <= 0 {
|
|
cfg.EvaluationIntervalSeconds = defaultCfg.EvaluationIntervalSeconds
|
|
}
|
|
normalizeOpsDistributedLockSettings(&cfg.DistributedLock, opsAlertEvaluatorLeaderLockKeyDefault, defaultCfg.DistributedLock.TTLSeconds)
|
|
normalizeOpsAlertSilencingSettings(&cfg.Silencing)
|
|
|
|
return cfg, nil
|
|
}
|
|
|
|
func (s *OpsService) UpdateOpsAlertRuntimeSettings(ctx context.Context, cfg *OpsAlertRuntimeSettings) (*OpsAlertRuntimeSettings, error) {
|
|
if s == nil || s.settingRepo == nil {
|
|
return nil, errors.New("setting repository not initialized")
|
|
}
|
|
if ctx == nil {
|
|
ctx = context.Background()
|
|
}
|
|
if cfg == nil {
|
|
return nil, errors.New("invalid config")
|
|
}
|
|
|
|
if cfg.EvaluationIntervalSeconds < 1 || cfg.EvaluationIntervalSeconds > int((24*time.Hour).Seconds()) {
|
|
return nil, errors.New("evaluation_interval_seconds must be between 1 and 86400")
|
|
}
|
|
if cfg.DistributedLock.Enabled {
|
|
if err := validateOpsDistributedLockSettings(cfg.DistributedLock); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
if cfg.Silencing.Enabled {
|
|
if err := validateOpsAlertSilencingSettings(cfg.Silencing); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
defaultCfg := defaultOpsAlertRuntimeSettings()
|
|
normalizeOpsDistributedLockSettings(&cfg.DistributedLock, opsAlertEvaluatorLeaderLockKeyDefault, defaultCfg.DistributedLock.TTLSeconds)
|
|
normalizeOpsAlertSilencingSettings(&cfg.Silencing)
|
|
|
|
raw, err := json.Marshal(cfg)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := s.settingRepo.Set(ctx, SettingKeyOpsAlertRuntimeSettings, string(raw)); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Return a fresh copy (avoid callers holding pointers into internal slices that may be mutated).
|
|
updated := &OpsAlertRuntimeSettings{}
|
|
_ = json.Unmarshal(raw, updated)
|
|
return updated, nil
|
|
}
|
|
|