From 7536dbfee5478f0f7922d904e826883af284e019 Mon Sep 17 00:00:00 2001 From: IanShaw027 <131567472+IanShaw027@users.noreply.github.com> Date: Mon, 12 Jan 2026 11:42:56 +0800 Subject: [PATCH] =?UTF-8?q?feat(ops):=20=E5=90=8E=E7=AB=AF=E6=B7=BB?= =?UTF-8?q?=E5=8A=A0=E6=8C=87=E6=A0=87=E9=98=88=E5=80=BC=E7=AE=A1=E7=90=86?= =?UTF-8?q?API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增GetMetricThresholds和UpdateMetricThresholds接口 - 支持配置SLA、延迟P99、TTFT P99、请求错误率、上游错误率阈值 - 添加参数验证逻辑 - 提供默认阈值配置 --- .../handler/admin/ops_settings_handler.go | 47 ++++++++++ backend/internal/server/routes/admin.go | 7 ++ backend/internal/service/ops_settings.go | 91 +++++++++++++++++++ .../internal/service/ops_settings_models.go | 9 ++ 4 files changed, 154 insertions(+) diff --git a/backend/internal/handler/admin/ops_settings_handler.go b/backend/internal/handler/admin/ops_settings_handler.go index 0e0ecb72..982836d0 100644 --- a/backend/internal/handler/admin/ops_settings_handler.go +++ b/backend/internal/handler/admin/ops_settings_handler.go @@ -146,3 +146,50 @@ func (h *OpsHandler) UpdateAdvancedSettings(c *gin.Context) { } response.Success(c, updated) } + +// GetMetricThresholds returns Ops metric thresholds (DB-backed). +// GET /api/v1/admin/ops/settings/metric-thresholds +func (h *OpsHandler) GetMetricThresholds(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + cfg, err := h.opsService.GetMetricThresholds(c.Request.Context()) + if err != nil { + response.Error(c, http.StatusInternalServerError, "Failed to get metric thresholds") + return + } + response.Success(c, cfg) +} + +// UpdateMetricThresholds updates Ops metric thresholds (DB-backed). +// PUT /api/v1/admin/ops/settings/metric-thresholds +func (h *OpsHandler) UpdateMetricThresholds(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + var req service.OpsMetricThresholds + if err := c.ShouldBindJSON(&req); err != nil { + response.BadRequest(c, "Invalid request body") + return + } + + updated, err := h.opsService.UpdateMetricThresholds(c.Request.Context(), &req) + if err != nil { + response.Error(c, http.StatusBadRequest, err.Error()) + return + } + response.Success(c, updated) +} + diff --git a/backend/internal/server/routes/admin.go b/backend/internal/server/routes/admin.go index a2f1b8c7..98d621c0 100644 --- a/backend/internal/server/routes/admin.go +++ b/backend/internal/server/routes/admin.go @@ -96,6 +96,13 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) { ops.GET("/advanced-settings", h.Admin.Ops.GetAdvancedSettings) ops.PUT("/advanced-settings", h.Admin.Ops.UpdateAdvancedSettings) + // Settings group (DB-backed) + settings := ops.Group("/settings") + { + settings.GET("/metric-thresholds", h.Admin.Ops.GetMetricThresholds) + settings.PUT("/metric-thresholds", h.Admin.Ops.UpdateMetricThresholds) + } + // WebSocket realtime (QPS/TPS) ws := ops.Group("/ws") { diff --git a/backend/internal/service/ops_settings.go b/backend/internal/service/ops_settings.go index fbf8f069..3252ec20 100644 --- a/backend/internal/service/ops_settings.go +++ b/backend/internal/service/ops_settings.go @@ -463,3 +463,94 @@ func (s *OpsService) UpdateOpsAdvancedSettings(ctx context.Context, cfg *OpsAdva _ = json.Unmarshal(raw, updated) return updated, nil } + +// ========================= +// Metric thresholds +// ========================= + +const SettingKeyOpsMetricThresholds = "ops_metric_thresholds" + +func defaultOpsMetricThresholds() *OpsMetricThresholds { + slaMin := 99.5 + latencyMax := 2000.0 + ttftMax := 500.0 + reqErrMax := 5.0 + upstreamErrMax := 5.0 + return &OpsMetricThresholds{ + SLAPercentMin: &slaMin, + LatencyP99MsMax: &latencyMax, + TTFTp99MsMax: &ttftMax, + RequestErrorRatePercentMax: &reqErrMax, + UpstreamErrorRatePercentMax: &upstreamErrMax, + } +} + +func (s *OpsService) GetMetricThresholds(ctx context.Context) (*OpsMetricThresholds, error) { + defaultCfg := defaultOpsMetricThresholds() + if s == nil || s.settingRepo == nil { + return defaultCfg, nil + } + if ctx == nil { + ctx = context.Background() + } + + raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsMetricThresholds) + if err != nil { + if errors.Is(err, ErrSettingNotFound) { + if b, mErr := json.Marshal(defaultCfg); mErr == nil { + _ = s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(b)) + } + return defaultCfg, nil + } + return nil, err + } + + cfg := &OpsMetricThresholds{} + if err := json.Unmarshal([]byte(raw), cfg); err != nil { + return defaultCfg, nil + } + + return cfg, nil +} + +func (s *OpsService) UpdateMetricThresholds(ctx context.Context, cfg *OpsMetricThresholds) (*OpsMetricThresholds, error) { + if s == nil || s.settingRepo == nil { + return nil, errors.New("setting repository not initialized") + } + if ctx == nil { + ctx = context.Background() + } + if cfg == nil { + return nil, errors.New("invalid config") + } + + // Validate thresholds + if cfg.SLAPercentMin != nil && (*cfg.SLAPercentMin < 0 || *cfg.SLAPercentMin > 100) { + return nil, errors.New("sla_percent_min must be between 0 and 100") + } + if cfg.LatencyP99MsMax != nil && *cfg.LatencyP99MsMax < 0 { + return nil, errors.New("latency_p99_ms_max must be >= 0") + } + if cfg.TTFTp99MsMax != nil && *cfg.TTFTp99MsMax < 0 { + return nil, errors.New("ttft_p99_ms_max must be >= 0") + } + if cfg.RequestErrorRatePercentMax != nil && (*cfg.RequestErrorRatePercentMax < 0 || *cfg.RequestErrorRatePercentMax > 100) { + return nil, errors.New("request_error_rate_percent_max must be between 0 and 100") + } + if cfg.UpstreamErrorRatePercentMax != nil && (*cfg.UpstreamErrorRatePercentMax < 0 || *cfg.UpstreamErrorRatePercentMax > 100) { + return nil, errors.New("upstream_error_rate_percent_max must be between 0 and 100") + } + + raw, err := json.Marshal(cfg) + if err != nil { + return nil, err + } + if err := s.settingRepo.Set(ctx, SettingKeyOpsMetricThresholds, string(raw)); err != nil { + return nil, err + } + + updated := &OpsMetricThresholds{} + _ = json.Unmarshal(raw, updated) + return updated, nil +} + diff --git a/backend/internal/service/ops_settings_models.go b/backend/internal/service/ops_settings_models.go index 7d9a823c..a6fef95e 100644 --- a/backend/internal/service/ops_settings_models.go +++ b/backend/internal/service/ops_settings_models.go @@ -61,11 +61,20 @@ type OpsAlertSilencingSettings struct { Entries []OpsAlertSilenceEntry `json:"entries,omitempty"` } +type OpsMetricThresholds struct { + SLAPercentMin *float64 `json:"sla_percent_min,omitempty"` // SLA低于此值变红 + LatencyP99MsMax *float64 `json:"latency_p99_ms_max,omitempty"` // 延迟P99高于此值变红 + TTFTp99MsMax *float64 `json:"ttft_p99_ms_max,omitempty"` // TTFT P99高于此值变红 + RequestErrorRatePercentMax *float64 `json:"request_error_rate_percent_max,omitempty"` // 请求错误率高于此值变红 + UpstreamErrorRatePercentMax *float64 `json:"upstream_error_rate_percent_max,omitempty"` // 上游错误率高于此值变红 +} + type OpsAlertRuntimeSettings struct { EvaluationIntervalSeconds int `json:"evaluation_interval_seconds"` DistributedLock OpsDistributedLockSettings `json:"distributed_lock"` Silencing OpsAlertSilencingSettings `json:"silencing"` + Thresholds OpsMetricThresholds `json:"thresholds"` // 指标阈值配置 } // OpsAdvancedSettings stores advanced ops configuration (data retention, aggregation).