- 修复depguard错误:为ops service文件添加redis导入例外 - 修复errcheck错误:添加错误检查和类型断言检查 - 修复gofmt错误:格式化代码 - 修复ineffassign错误:移除无效的idx++赋值 - 修复staticcheck错误:合并条件赋值 - 修复unused错误:移除未使用的字段和函数 - ops_cleanup_service.go: entryID字段 - ops_retry.go: status字段 - ops_upstream_context.go: getOpsUpstreamErrors函数
433 lines
12 KiB
Go
433 lines
12 KiB
Go
package admin
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"math"
|
|
"net/http"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/Wei-Shaw/sub2api/internal/pkg/response"
|
|
"github.com/Wei-Shaw/sub2api/internal/service"
|
|
"github.com/gin-gonic/gin"
|
|
"github.com/gin-gonic/gin/binding"
|
|
)
|
|
|
|
var validOpsAlertMetricTypes = []string{
|
|
"success_rate",
|
|
"error_rate",
|
|
"upstream_error_rate",
|
|
"p95_latency_ms",
|
|
"p99_latency_ms",
|
|
"cpu_usage_percent",
|
|
"memory_usage_percent",
|
|
"concurrency_queue_depth",
|
|
}
|
|
|
|
var validOpsAlertMetricTypeSet = func() map[string]struct{} {
|
|
set := make(map[string]struct{}, len(validOpsAlertMetricTypes))
|
|
for _, v := range validOpsAlertMetricTypes {
|
|
set[v] = struct{}{}
|
|
}
|
|
return set
|
|
}()
|
|
|
|
var validOpsAlertOperators = []string{">", "<", ">=", "<=", "==", "!="}
|
|
|
|
var validOpsAlertOperatorSet = func() map[string]struct{} {
|
|
set := make(map[string]struct{}, len(validOpsAlertOperators))
|
|
for _, v := range validOpsAlertOperators {
|
|
set[v] = struct{}{}
|
|
}
|
|
return set
|
|
}()
|
|
|
|
var validOpsAlertSeverities = []string{"P0", "P1", "P2", "P3"}
|
|
|
|
var validOpsAlertSeveritySet = func() map[string]struct{} {
|
|
set := make(map[string]struct{}, len(validOpsAlertSeverities))
|
|
for _, v := range validOpsAlertSeverities {
|
|
set[v] = struct{}{}
|
|
}
|
|
return set
|
|
}()
|
|
|
|
type opsAlertRuleValidatedInput struct {
|
|
Name string
|
|
MetricType string
|
|
Operator string
|
|
Threshold float64
|
|
|
|
Severity string
|
|
|
|
WindowMinutes int
|
|
SustainedMinutes int
|
|
CooldownMinutes int
|
|
|
|
Enabled bool
|
|
NotifyEmail bool
|
|
|
|
WindowProvided bool
|
|
SustainedProvided bool
|
|
CooldownProvided bool
|
|
SeverityProvided bool
|
|
EnabledProvided bool
|
|
NotifyProvided bool
|
|
}
|
|
|
|
func isPercentOrRateMetric(metricType string) bool {
|
|
switch metricType {
|
|
case "success_rate",
|
|
"error_rate",
|
|
"upstream_error_rate",
|
|
"cpu_usage_percent",
|
|
"memory_usage_percent":
|
|
return true
|
|
default:
|
|
return false
|
|
}
|
|
}
|
|
|
|
func validateOpsAlertRulePayload(raw map[string]json.RawMessage) (*opsAlertRuleValidatedInput, error) {
|
|
if raw == nil {
|
|
return nil, fmt.Errorf("invalid request body")
|
|
}
|
|
|
|
requiredFields := []string{"name", "metric_type", "operator", "threshold"}
|
|
for _, field := range requiredFields {
|
|
if _, ok := raw[field]; !ok {
|
|
return nil, fmt.Errorf("%s is required", field)
|
|
}
|
|
}
|
|
|
|
var name string
|
|
if err := json.Unmarshal(raw["name"], &name); err != nil || strings.TrimSpace(name) == "" {
|
|
return nil, fmt.Errorf("name is required")
|
|
}
|
|
name = strings.TrimSpace(name)
|
|
|
|
var metricType string
|
|
if err := json.Unmarshal(raw["metric_type"], &metricType); err != nil || strings.TrimSpace(metricType) == "" {
|
|
return nil, fmt.Errorf("metric_type is required")
|
|
}
|
|
metricType = strings.TrimSpace(metricType)
|
|
if _, ok := validOpsAlertMetricTypeSet[metricType]; !ok {
|
|
return nil, fmt.Errorf("metric_type must be one of: %s", strings.Join(validOpsAlertMetricTypes, ", "))
|
|
}
|
|
|
|
var operator string
|
|
if err := json.Unmarshal(raw["operator"], &operator); err != nil || strings.TrimSpace(operator) == "" {
|
|
return nil, fmt.Errorf("operator is required")
|
|
}
|
|
operator = strings.TrimSpace(operator)
|
|
if _, ok := validOpsAlertOperatorSet[operator]; !ok {
|
|
return nil, fmt.Errorf("operator must be one of: %s", strings.Join(validOpsAlertOperators, ", "))
|
|
}
|
|
|
|
var threshold float64
|
|
if err := json.Unmarshal(raw["threshold"], &threshold); err != nil {
|
|
return nil, fmt.Errorf("threshold must be a number")
|
|
}
|
|
if math.IsNaN(threshold) || math.IsInf(threshold, 0) {
|
|
return nil, fmt.Errorf("threshold must be a finite number")
|
|
}
|
|
if isPercentOrRateMetric(metricType) {
|
|
if threshold < 0 || threshold > 100 {
|
|
return nil, fmt.Errorf("threshold must be between 0 and 100 for metric_type %s", metricType)
|
|
}
|
|
} else if threshold < 0 {
|
|
return nil, fmt.Errorf("threshold must be >= 0")
|
|
}
|
|
|
|
validated := &opsAlertRuleValidatedInput{
|
|
Name: name,
|
|
MetricType: metricType,
|
|
Operator: operator,
|
|
Threshold: threshold,
|
|
}
|
|
|
|
if v, ok := raw["severity"]; ok {
|
|
validated.SeverityProvided = true
|
|
var sev string
|
|
if err := json.Unmarshal(v, &sev); err != nil {
|
|
return nil, fmt.Errorf("severity must be a string")
|
|
}
|
|
sev = strings.ToUpper(strings.TrimSpace(sev))
|
|
if sev != "" {
|
|
if _, ok := validOpsAlertSeveritySet[sev]; !ok {
|
|
return nil, fmt.Errorf("severity must be one of: %s", strings.Join(validOpsAlertSeverities, ", "))
|
|
}
|
|
validated.Severity = sev
|
|
}
|
|
}
|
|
if validated.Severity == "" {
|
|
validated.Severity = "P2"
|
|
}
|
|
|
|
if v, ok := raw["enabled"]; ok {
|
|
validated.EnabledProvided = true
|
|
if err := json.Unmarshal(v, &validated.Enabled); err != nil {
|
|
return nil, fmt.Errorf("enabled must be a boolean")
|
|
}
|
|
} else {
|
|
validated.Enabled = true
|
|
}
|
|
|
|
if v, ok := raw["notify_email"]; ok {
|
|
validated.NotifyProvided = true
|
|
if err := json.Unmarshal(v, &validated.NotifyEmail); err != nil {
|
|
return nil, fmt.Errorf("notify_email must be a boolean")
|
|
}
|
|
} else {
|
|
validated.NotifyEmail = true
|
|
}
|
|
|
|
if v, ok := raw["window_minutes"]; ok {
|
|
validated.WindowProvided = true
|
|
if err := json.Unmarshal(v, &validated.WindowMinutes); err != nil {
|
|
return nil, fmt.Errorf("window_minutes must be an integer")
|
|
}
|
|
switch validated.WindowMinutes {
|
|
case 1, 5, 60:
|
|
default:
|
|
return nil, fmt.Errorf("window_minutes must be one of: 1, 5, 60")
|
|
}
|
|
} else {
|
|
validated.WindowMinutes = 1
|
|
}
|
|
|
|
if v, ok := raw["sustained_minutes"]; ok {
|
|
validated.SustainedProvided = true
|
|
if err := json.Unmarshal(v, &validated.SustainedMinutes); err != nil {
|
|
return nil, fmt.Errorf("sustained_minutes must be an integer")
|
|
}
|
|
if validated.SustainedMinutes < 1 || validated.SustainedMinutes > 1440 {
|
|
return nil, fmt.Errorf("sustained_minutes must be between 1 and 1440")
|
|
}
|
|
} else {
|
|
validated.SustainedMinutes = 1
|
|
}
|
|
|
|
if v, ok := raw["cooldown_minutes"]; ok {
|
|
validated.CooldownProvided = true
|
|
if err := json.Unmarshal(v, &validated.CooldownMinutes); err != nil {
|
|
return nil, fmt.Errorf("cooldown_minutes must be an integer")
|
|
}
|
|
if validated.CooldownMinutes < 0 || validated.CooldownMinutes > 1440 {
|
|
return nil, fmt.Errorf("cooldown_minutes must be between 0 and 1440")
|
|
}
|
|
} else {
|
|
validated.CooldownMinutes = 0
|
|
}
|
|
|
|
return validated, nil
|
|
}
|
|
|
|
// ListAlertRules returns all ops alert rules.
|
|
// GET /api/v1/admin/ops/alert-rules
|
|
func (h *OpsHandler) ListAlertRules(c *gin.Context) {
|
|
if h.opsService == nil {
|
|
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
|
return
|
|
}
|
|
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
|
|
rules, err := h.opsService.ListAlertRules(c.Request.Context())
|
|
if err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
response.Success(c, rules)
|
|
}
|
|
|
|
// CreateAlertRule creates an ops alert rule.
|
|
// POST /api/v1/admin/ops/alert-rules
|
|
func (h *OpsHandler) CreateAlertRule(c *gin.Context) {
|
|
if h.opsService == nil {
|
|
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
|
return
|
|
}
|
|
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
|
|
var raw map[string]json.RawMessage
|
|
if err := c.ShouldBindBodyWith(&raw, binding.JSON); err != nil {
|
|
response.BadRequest(c, "Invalid request body")
|
|
return
|
|
}
|
|
validated, err := validateOpsAlertRulePayload(raw)
|
|
if err != nil {
|
|
response.BadRequest(c, err.Error())
|
|
return
|
|
}
|
|
|
|
var rule service.OpsAlertRule
|
|
if err := c.ShouldBindBodyWith(&rule, binding.JSON); err != nil {
|
|
response.BadRequest(c, "Invalid request body")
|
|
return
|
|
}
|
|
|
|
rule.Name = validated.Name
|
|
rule.MetricType = validated.MetricType
|
|
rule.Operator = validated.Operator
|
|
rule.Threshold = validated.Threshold
|
|
rule.WindowMinutes = validated.WindowMinutes
|
|
rule.SustainedMinutes = validated.SustainedMinutes
|
|
rule.CooldownMinutes = validated.CooldownMinutes
|
|
rule.Severity = validated.Severity
|
|
rule.Enabled = validated.Enabled
|
|
rule.NotifyEmail = validated.NotifyEmail
|
|
|
|
created, err := h.opsService.CreateAlertRule(c.Request.Context(), &rule)
|
|
if err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
response.Success(c, created)
|
|
}
|
|
|
|
// UpdateAlertRule updates an existing ops alert rule.
|
|
// PUT /api/v1/admin/ops/alert-rules/:id
|
|
func (h *OpsHandler) UpdateAlertRule(c *gin.Context) {
|
|
if h.opsService == nil {
|
|
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
|
return
|
|
}
|
|
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
|
|
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
|
if err != nil || id <= 0 {
|
|
response.BadRequest(c, "Invalid rule ID")
|
|
return
|
|
}
|
|
|
|
var raw map[string]json.RawMessage
|
|
if err := c.ShouldBindBodyWith(&raw, binding.JSON); err != nil {
|
|
response.BadRequest(c, "Invalid request body")
|
|
return
|
|
}
|
|
validated, err := validateOpsAlertRulePayload(raw)
|
|
if err != nil {
|
|
response.BadRequest(c, err.Error())
|
|
return
|
|
}
|
|
|
|
var rule service.OpsAlertRule
|
|
if err := c.ShouldBindBodyWith(&rule, binding.JSON); err != nil {
|
|
response.BadRequest(c, "Invalid request body")
|
|
return
|
|
}
|
|
|
|
rule.ID = id
|
|
rule.Name = validated.Name
|
|
rule.MetricType = validated.MetricType
|
|
rule.Operator = validated.Operator
|
|
rule.Threshold = validated.Threshold
|
|
rule.WindowMinutes = validated.WindowMinutes
|
|
rule.SustainedMinutes = validated.SustainedMinutes
|
|
rule.CooldownMinutes = validated.CooldownMinutes
|
|
rule.Severity = validated.Severity
|
|
rule.Enabled = validated.Enabled
|
|
rule.NotifyEmail = validated.NotifyEmail
|
|
|
|
updated, err := h.opsService.UpdateAlertRule(c.Request.Context(), &rule)
|
|
if err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
response.Success(c, updated)
|
|
}
|
|
|
|
// DeleteAlertRule deletes an ops alert rule.
|
|
// DELETE /api/v1/admin/ops/alert-rules/:id
|
|
func (h *OpsHandler) DeleteAlertRule(c *gin.Context) {
|
|
if h.opsService == nil {
|
|
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
|
return
|
|
}
|
|
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
|
|
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
|
if err != nil || id <= 0 {
|
|
response.BadRequest(c, "Invalid rule ID")
|
|
return
|
|
}
|
|
|
|
if err := h.opsService.DeleteAlertRule(c.Request.Context(), id); err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
response.Success(c, gin.H{"deleted": true})
|
|
}
|
|
|
|
// ListAlertEvents lists recent ops alert events.
|
|
// GET /api/v1/admin/ops/alert-events
|
|
func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
|
|
if h.opsService == nil {
|
|
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
|
return
|
|
}
|
|
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
|
|
limit := 100
|
|
if raw := strings.TrimSpace(c.Query("limit")); raw != "" {
|
|
n, err := strconv.Atoi(raw)
|
|
if err != nil || n <= 0 {
|
|
response.BadRequest(c, "Invalid limit")
|
|
return
|
|
}
|
|
limit = n
|
|
}
|
|
|
|
filter := &service.OpsAlertEventFilter{
|
|
Limit: limit,
|
|
Status: strings.TrimSpace(c.Query("status")),
|
|
Severity: strings.TrimSpace(c.Query("severity")),
|
|
}
|
|
|
|
// Optional global filter support (platform/group/time range).
|
|
if platform := strings.TrimSpace(c.Query("platform")); platform != "" {
|
|
filter.Platform = platform
|
|
}
|
|
if v := strings.TrimSpace(c.Query("group_id")); v != "" {
|
|
id, err := strconv.ParseInt(v, 10, 64)
|
|
if err != nil || id <= 0 {
|
|
response.BadRequest(c, "Invalid group_id")
|
|
return
|
|
}
|
|
filter.GroupID = &id
|
|
}
|
|
if startTime, endTime, err := parseOpsTimeRange(c, "24h"); err == nil {
|
|
// Only apply when explicitly provided to avoid surprising default narrowing.
|
|
if strings.TrimSpace(c.Query("start_time")) != "" || strings.TrimSpace(c.Query("end_time")) != "" || strings.TrimSpace(c.Query("time_range")) != "" {
|
|
filter.StartTime = &startTime
|
|
filter.EndTime = &endTime
|
|
}
|
|
} else {
|
|
response.BadRequest(c, err.Error())
|
|
return
|
|
}
|
|
|
|
events, err := h.opsService.ListAlertEvents(c.Request.Context(), filter)
|
|
if err != nil {
|
|
response.ErrorFrom(c, err)
|
|
return
|
|
}
|
|
response.Success(c, events)
|
|
}
|