feat(handler): 实现运维监控 API 处理器和中间件
- 新增 ops 错误日志记录器(ops_error_logger.go) - 新增 ops 主处理器(ops_handler.go) - 新增告警管理处理器(ops_alerts_handler.go) - 新增仪表板处理器(ops_dashboard_handler.go) - 新增实时监控处理器(ops_realtime_handler.go) - 新增配置管理处理器(ops_settings_handler.go) - 新增 WebSocket 处理器(ops_ws_handler.go) - 扩展设置 DTO 支持 ops 配置 - 新增客户端请求 ID 中间件(client_request_id.go) - 新增 WebSocket 查询令牌认证中间件(ws_query_token_auth.go) - 更新管理员认证中间件支持 ops 路由 - 注册 handler 依赖注入
This commit is contained in:
433
backend/internal/handler/admin/ops_alerts_handler.go
Normal file
433
backend/internal/handler/admin/ops_alerts_handler.go
Normal file
@@ -0,0 +1,433 @@
|
||||
package admin
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/Wei-Shaw/sub2api/internal/pkg/response"
|
||||
"github.com/Wei-Shaw/sub2api/internal/service"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/gin-gonic/gin/binding"
|
||||
)
|
||||
|
||||
var validOpsAlertMetricTypes = []string{
|
||||
"success_rate",
|
||||
"error_rate",
|
||||
"upstream_error_rate",
|
||||
"p95_latency_ms",
|
||||
"p99_latency_ms",
|
||||
"cpu_usage_percent",
|
||||
"memory_usage_percent",
|
||||
"concurrency_queue_depth",
|
||||
}
|
||||
|
||||
var validOpsAlertMetricTypeSet = func() map[string]struct{} {
|
||||
set := make(map[string]struct{}, len(validOpsAlertMetricTypes))
|
||||
for _, v := range validOpsAlertMetricTypes {
|
||||
set[v] = struct{}{}
|
||||
}
|
||||
return set
|
||||
}()
|
||||
|
||||
var validOpsAlertOperators = []string{">", "<", ">=", "<=", "==", "!="}
|
||||
|
||||
var validOpsAlertOperatorSet = func() map[string]struct{} {
|
||||
set := make(map[string]struct{}, len(validOpsAlertOperators))
|
||||
for _, v := range validOpsAlertOperators {
|
||||
set[v] = struct{}{}
|
||||
}
|
||||
return set
|
||||
}()
|
||||
|
||||
var validOpsAlertSeverities = []string{"P0", "P1", "P2", "P3"}
|
||||
|
||||
var validOpsAlertSeveritySet = func() map[string]struct{} {
|
||||
set := make(map[string]struct{}, len(validOpsAlertSeverities))
|
||||
for _, v := range validOpsAlertSeverities {
|
||||
set[v] = struct{}{}
|
||||
}
|
||||
return set
|
||||
}()
|
||||
|
||||
type opsAlertRuleValidatedInput struct {
|
||||
Name string
|
||||
MetricType string
|
||||
Operator string
|
||||
Threshold float64
|
||||
|
||||
Severity string
|
||||
|
||||
WindowMinutes int
|
||||
SustainedMinutes int
|
||||
CooldownMinutes int
|
||||
|
||||
Enabled bool
|
||||
NotifyEmail bool
|
||||
|
||||
WindowProvided bool
|
||||
SustainedProvided bool
|
||||
CooldownProvided bool
|
||||
SeverityProvided bool
|
||||
EnabledProvided bool
|
||||
NotifyProvided bool
|
||||
}
|
||||
|
||||
func isPercentOrRateMetric(metricType string) bool {
|
||||
switch metricType {
|
||||
case "success_rate",
|
||||
"error_rate",
|
||||
"upstream_error_rate",
|
||||
"cpu_usage_percent",
|
||||
"memory_usage_percent":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
func validateOpsAlertRulePayload(raw map[string]json.RawMessage) (*opsAlertRuleValidatedInput, error) {
|
||||
if raw == nil {
|
||||
return nil, fmt.Errorf("invalid request body")
|
||||
}
|
||||
|
||||
requiredFields := []string{"name", "metric_type", "operator", "threshold"}
|
||||
for _, field := range requiredFields {
|
||||
if _, ok := raw[field]; !ok {
|
||||
return nil, fmt.Errorf("%s is required", field)
|
||||
}
|
||||
}
|
||||
|
||||
var name string
|
||||
if err := json.Unmarshal(raw["name"], &name); err != nil || strings.TrimSpace(name) == "" {
|
||||
return nil, fmt.Errorf("name is required")
|
||||
}
|
||||
name = strings.TrimSpace(name)
|
||||
|
||||
var metricType string
|
||||
if err := json.Unmarshal(raw["metric_type"], &metricType); err != nil || strings.TrimSpace(metricType) == "" {
|
||||
return nil, fmt.Errorf("metric_type is required")
|
||||
}
|
||||
metricType = strings.TrimSpace(metricType)
|
||||
if _, ok := validOpsAlertMetricTypeSet[metricType]; !ok {
|
||||
return nil, fmt.Errorf("metric_type must be one of: %s", strings.Join(validOpsAlertMetricTypes, ", "))
|
||||
}
|
||||
|
||||
var operator string
|
||||
if err := json.Unmarshal(raw["operator"], &operator); err != nil || strings.TrimSpace(operator) == "" {
|
||||
return nil, fmt.Errorf("operator is required")
|
||||
}
|
||||
operator = strings.TrimSpace(operator)
|
||||
if _, ok := validOpsAlertOperatorSet[operator]; !ok {
|
||||
return nil, fmt.Errorf("operator must be one of: %s", strings.Join(validOpsAlertOperators, ", "))
|
||||
}
|
||||
|
||||
var threshold float64
|
||||
if err := json.Unmarshal(raw["threshold"], &threshold); err != nil {
|
||||
return nil, fmt.Errorf("threshold must be a number")
|
||||
}
|
||||
if math.IsNaN(threshold) || math.IsInf(threshold, 0) {
|
||||
return nil, fmt.Errorf("threshold must be a finite number")
|
||||
}
|
||||
if isPercentOrRateMetric(metricType) {
|
||||
if threshold < 0 || threshold > 100 {
|
||||
return nil, fmt.Errorf("threshold must be between 0 and 100 for metric_type %s", metricType)
|
||||
}
|
||||
} else if threshold < 0 {
|
||||
return nil, fmt.Errorf("threshold must be >= 0")
|
||||
}
|
||||
|
||||
validated := &opsAlertRuleValidatedInput{
|
||||
Name: name,
|
||||
MetricType: metricType,
|
||||
Operator: operator,
|
||||
Threshold: threshold,
|
||||
}
|
||||
|
||||
if v, ok := raw["severity"]; ok {
|
||||
validated.SeverityProvided = true
|
||||
var sev string
|
||||
if err := json.Unmarshal(v, &sev); err != nil {
|
||||
return nil, fmt.Errorf("severity must be a string")
|
||||
}
|
||||
sev = strings.ToUpper(strings.TrimSpace(sev))
|
||||
if sev != "" {
|
||||
if _, ok := validOpsAlertSeveritySet[sev]; !ok {
|
||||
return nil, fmt.Errorf("severity must be one of: %s", strings.Join(validOpsAlertSeverities, ", "))
|
||||
}
|
||||
validated.Severity = sev
|
||||
}
|
||||
}
|
||||
if validated.Severity == "" {
|
||||
validated.Severity = "P2"
|
||||
}
|
||||
|
||||
if v, ok := raw["enabled"]; ok {
|
||||
validated.EnabledProvided = true
|
||||
if err := json.Unmarshal(v, &validated.Enabled); err != nil {
|
||||
return nil, fmt.Errorf("enabled must be a boolean")
|
||||
}
|
||||
} else {
|
||||
validated.Enabled = true
|
||||
}
|
||||
|
||||
if v, ok := raw["notify_email"]; ok {
|
||||
validated.NotifyProvided = true
|
||||
if err := json.Unmarshal(v, &validated.NotifyEmail); err != nil {
|
||||
return nil, fmt.Errorf("notify_email must be a boolean")
|
||||
}
|
||||
} else {
|
||||
validated.NotifyEmail = true
|
||||
}
|
||||
|
||||
if v, ok := raw["window_minutes"]; ok {
|
||||
validated.WindowProvided = true
|
||||
if err := json.Unmarshal(v, &validated.WindowMinutes); err != nil {
|
||||
return nil, fmt.Errorf("window_minutes must be an integer")
|
||||
}
|
||||
switch validated.WindowMinutes {
|
||||
case 1, 5, 60:
|
||||
default:
|
||||
return nil, fmt.Errorf("window_minutes must be one of: 1, 5, 60")
|
||||
}
|
||||
} else {
|
||||
validated.WindowMinutes = 1
|
||||
}
|
||||
|
||||
if v, ok := raw["sustained_minutes"]; ok {
|
||||
validated.SustainedProvided = true
|
||||
if err := json.Unmarshal(v, &validated.SustainedMinutes); err != nil {
|
||||
return nil, fmt.Errorf("sustained_minutes must be an integer")
|
||||
}
|
||||
if validated.SustainedMinutes < 1 || validated.SustainedMinutes > 1440 {
|
||||
return nil, fmt.Errorf("sustained_minutes must be between 1 and 1440")
|
||||
}
|
||||
} else {
|
||||
validated.SustainedMinutes = 1
|
||||
}
|
||||
|
||||
if v, ok := raw["cooldown_minutes"]; ok {
|
||||
validated.CooldownProvided = true
|
||||
if err := json.Unmarshal(v, &validated.CooldownMinutes); err != nil {
|
||||
return nil, fmt.Errorf("cooldown_minutes must be an integer")
|
||||
}
|
||||
if validated.CooldownMinutes < 0 || validated.CooldownMinutes > 1440 {
|
||||
return nil, fmt.Errorf("cooldown_minutes must be between 0 and 1440")
|
||||
}
|
||||
} else {
|
||||
validated.CooldownMinutes = 0
|
||||
}
|
||||
|
||||
return validated, nil
|
||||
}
|
||||
|
||||
// ListAlertRules returns all ops alert rules.
|
||||
// GET /api/v1/admin/ops/alert-rules
|
||||
func (h *OpsHandler) ListAlertRules(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
rules, err := h.opsService.ListAlertRules(c.Request.Context())
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, rules)
|
||||
}
|
||||
|
||||
// CreateAlertRule creates an ops alert rule.
|
||||
// POST /api/v1/admin/ops/alert-rules
|
||||
func (h *OpsHandler) CreateAlertRule(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
var raw map[string]json.RawMessage
|
||||
if err := c.ShouldBindBodyWith(&raw, binding.JSON); err != nil {
|
||||
response.BadRequest(c, "Invalid request body")
|
||||
return
|
||||
}
|
||||
validated, err := validateOpsAlertRulePayload(raw)
|
||||
if err != nil {
|
||||
response.BadRequest(c, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
var rule service.OpsAlertRule
|
||||
if err := c.ShouldBindBodyWith(&rule, binding.JSON); err != nil {
|
||||
response.BadRequest(c, "Invalid request body")
|
||||
return
|
||||
}
|
||||
|
||||
rule.Name = validated.Name
|
||||
rule.MetricType = validated.MetricType
|
||||
rule.Operator = validated.Operator
|
||||
rule.Threshold = validated.Threshold
|
||||
rule.WindowMinutes = validated.WindowMinutes
|
||||
rule.SustainedMinutes = validated.SustainedMinutes
|
||||
rule.CooldownMinutes = validated.CooldownMinutes
|
||||
rule.Severity = validated.Severity
|
||||
rule.Enabled = validated.Enabled
|
||||
rule.NotifyEmail = validated.NotifyEmail
|
||||
|
||||
created, err := h.opsService.CreateAlertRule(c.Request.Context(), &rule)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, created)
|
||||
}
|
||||
|
||||
// UpdateAlertRule updates an existing ops alert rule.
|
||||
// PUT /api/v1/admin/ops/alert-rules/:id
|
||||
func (h *OpsHandler) UpdateAlertRule(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid rule ID")
|
||||
return
|
||||
}
|
||||
|
||||
var raw map[string]json.RawMessage
|
||||
if err := c.ShouldBindBodyWith(&raw, binding.JSON); err != nil {
|
||||
response.BadRequest(c, "Invalid request body")
|
||||
return
|
||||
}
|
||||
validated, err := validateOpsAlertRulePayload(raw)
|
||||
if err != nil {
|
||||
response.BadRequest(c, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
var rule service.OpsAlertRule
|
||||
if err := c.ShouldBindBodyWith(&rule, binding.JSON); err != nil {
|
||||
response.BadRequest(c, "Invalid request body")
|
||||
return
|
||||
}
|
||||
|
||||
rule.ID = id
|
||||
rule.Name = validated.Name
|
||||
rule.MetricType = validated.MetricType
|
||||
rule.Operator = validated.Operator
|
||||
rule.Threshold = validated.Threshold
|
||||
rule.WindowMinutes = validated.WindowMinutes
|
||||
rule.SustainedMinutes = validated.SustainedMinutes
|
||||
rule.CooldownMinutes = validated.CooldownMinutes
|
||||
rule.Severity = validated.Severity
|
||||
rule.Enabled = validated.Enabled
|
||||
rule.NotifyEmail = validated.NotifyEmail
|
||||
|
||||
updated, err := h.opsService.UpdateAlertRule(c.Request.Context(), &rule)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, updated)
|
||||
}
|
||||
|
||||
// DeleteAlertRule deletes an ops alert rule.
|
||||
// DELETE /api/v1/admin/ops/alert-rules/:id
|
||||
func (h *OpsHandler) DeleteAlertRule(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid rule ID")
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.opsService.DeleteAlertRule(c.Request.Context(), id); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, gin.H{"deleted": true})
|
||||
}
|
||||
|
||||
// ListAlertEvents lists recent ops alert events.
|
||||
// GET /api/v1/admin/ops/alert-events
|
||||
func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
limit := 100
|
||||
if raw := strings.TrimSpace(c.Query("limit")); raw != "" {
|
||||
n, err := strconv.Atoi(raw)
|
||||
if err != nil || n <= 0 {
|
||||
response.BadRequest(c, "Invalid limit")
|
||||
return
|
||||
}
|
||||
limit = n
|
||||
}
|
||||
|
||||
filter := &service.OpsAlertEventFilter{
|
||||
Limit: limit,
|
||||
Status: strings.TrimSpace(c.Query("status")),
|
||||
Severity: strings.TrimSpace(c.Query("severity")),
|
||||
}
|
||||
|
||||
// Optional global filter support (platform/group/time range).
|
||||
if platform := strings.TrimSpace(c.Query("platform")); platform != "" {
|
||||
filter.Platform = platform
|
||||
}
|
||||
if v := strings.TrimSpace(c.Query("group_id")); v != "" {
|
||||
id, err := strconv.ParseInt(v, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid group_id")
|
||||
return
|
||||
}
|
||||
filter.GroupID = &id
|
||||
}
|
||||
if startTime, endTime, err := parseOpsTimeRange(c, "24h"); err == nil {
|
||||
// Only apply when explicitly provided to avoid surprising default narrowing.
|
||||
if strings.TrimSpace(c.Query("start_time")) != "" || strings.TrimSpace(c.Query("end_time")) != "" || strings.TrimSpace(c.Query("time_range")) != "" {
|
||||
filter.StartTime = &startTime
|
||||
filter.EndTime = &endTime
|
||||
}
|
||||
} else {
|
||||
response.BadRequest(c, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
events, err := h.opsService.ListAlertEvents(c.Request.Context(), filter)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, events)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user