feat(handler): 新增ops管理接口和路由

- 添加告警静默管理接口
- 扩展错误日志查询和操作接口
- 新增重试和解决状态相关端点
- 完善错误日志记录功能
This commit is contained in:
IanShaw027
2026-01-14 09:03:35 +08:00
parent d601768016
commit 659df6e220
4 changed files with 296 additions and 14 deletions

View File

@@ -7,8 +7,10 @@ import (
"net/http"
"strconv"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/pkg/response"
"github.com/Wei-Shaw/sub2api/internal/server/middleware"
"github.com/Wei-Shaw/sub2api/internal/service"
"github.com/gin-gonic/gin"
"github.com/gin-gonic/gin/binding"
@@ -372,8 +374,135 @@ func (h *OpsHandler) DeleteAlertRule(c *gin.Context) {
response.Success(c, gin.H{"deleted": true})
}
// GetAlertEvent returns a single ops alert event.
// GET /api/v1/admin/ops/alert-events/:id
func (h *OpsHandler) GetAlertEvent(c *gin.Context) {
if h.opsService == nil {
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
return
}
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
response.ErrorFrom(c, err)
return
}
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil || id <= 0 {
response.BadRequest(c, "Invalid event ID")
return
}
ev, err := h.opsService.GetAlertEventByID(c.Request.Context(), id)
if err != nil {
response.ErrorFrom(c, err)
return
}
response.Success(c, ev)
}
// UpdateAlertEventStatus updates an ops alert event status.
// PUT /api/v1/admin/ops/alert-events/:id/status
func (h *OpsHandler) UpdateAlertEventStatus(c *gin.Context) {
if h.opsService == nil {
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
return
}
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
response.ErrorFrom(c, err)
return
}
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil || id <= 0 {
response.BadRequest(c, "Invalid event ID")
return
}
var payload struct {
Status string `json:"status"`
}
if err := c.ShouldBindJSON(&payload); err != nil {
response.BadRequest(c, "Invalid request body")
return
}
payload.Status = strings.TrimSpace(payload.Status)
if payload.Status == "" {
response.BadRequest(c, "Invalid status")
return
}
if payload.Status != service.OpsAlertStatusResolved && payload.Status != service.OpsAlertStatusManualResolved {
response.BadRequest(c, "Invalid status")
return
}
var resolvedAt *time.Time
if payload.Status == service.OpsAlertStatusResolved || payload.Status == service.OpsAlertStatusManualResolved {
now := time.Now().UTC()
resolvedAt = &now
}
if err := h.opsService.UpdateAlertEventStatus(c.Request.Context(), id, payload.Status, resolvedAt); err != nil {
response.ErrorFrom(c, err)
return
}
response.Success(c, gin.H{"updated": true})
}
// ListAlertEvents lists recent ops alert events.
// GET /api/v1/admin/ops/alert-events
// CreateAlertSilence creates a scoped silence for ops alerts.
// POST /api/v1/admin/ops/alert-silences
func (h *OpsHandler) CreateAlertSilence(c *gin.Context) {
if h.opsService == nil {
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
return
}
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
response.ErrorFrom(c, err)
return
}
var payload struct {
RuleID int64 `json:"rule_id"`
Platform string `json:"platform"`
GroupID *int64 `json:"group_id"`
Region *string `json:"region"`
Until string `json:"until"`
Reason string `json:"reason"`
}
if err := c.ShouldBindJSON(&payload); err != nil {
response.BadRequest(c, "Invalid request body")
return
}
until, err := time.Parse(time.RFC3339, strings.TrimSpace(payload.Until))
if err != nil {
response.BadRequest(c, "Invalid until")
return
}
createdBy := (*int64)(nil)
if subject, ok := middleware.GetAuthSubjectFromContext(c); ok {
uid := subject.UserID
createdBy = &uid
}
silence := &service.OpsAlertSilence{
RuleID: payload.RuleID,
Platform: strings.TrimSpace(payload.Platform),
GroupID: payload.GroupID,
Region: payload.Region,
Until: until,
Reason: strings.TrimSpace(payload.Reason),
CreatedBy: createdBy,
}
created, err := h.opsService.CreateAlertSilence(c.Request.Context(), silence)
if err != nil {
response.ErrorFrom(c, err)
return
}
response.Success(c, created)
}
func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
if h.opsService == nil {
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
@@ -384,7 +513,7 @@ func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
return
}
limit := 100
limit := 20
if raw := strings.TrimSpace(c.Query("limit")); raw != "" {
n, err := strconv.Atoi(raw)
if err != nil || n <= 0 {
@@ -400,6 +529,43 @@ func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
Severity: strings.TrimSpace(c.Query("severity")),
}
if v := strings.TrimSpace(c.Query("email_sent")); v != "" {
vv := strings.ToLower(v)
switch vv {
case "true", "1":
b := true
filter.EmailSent = &b
case "false", "0":
b := false
filter.EmailSent = &b
default:
response.BadRequest(c, "Invalid email_sent")
return
}
}
// Cursor pagination
if rawTS := strings.TrimSpace(c.Query("before_fired_at")); rawTS != "" {
ts, err := time.Parse(time.RFC3339Nano, rawTS)
if err != nil {
if t2, err2 := time.Parse(time.RFC3339, rawTS); err2 == nil {
ts = t2
} else {
response.BadRequest(c, "Invalid before_fired_at")
return
}
}
filter.BeforeFiredAt = &ts
}
if rawID := strings.TrimSpace(c.Query("before_id")); rawID != "" {
id, err := strconv.ParseInt(rawID, 10, 64)
if err != nil || id <= 0 {
response.BadRequest(c, "Invalid before_id")
return
}
filter.BeforeID = &id
}
// Optional global filter support (platform/group/time range).
if platform := strings.TrimSpace(c.Query("platform")); platform != "" {
filter.Platform = platform

View File

@@ -80,6 +80,25 @@ func (h *OpsHandler) GetErrorLogs(c *gin.Context) {
if phase := strings.TrimSpace(c.Query("phase")); phase != "" {
filter.Phase = phase
}
if owner := strings.TrimSpace(c.Query("error_owner")); owner != "" {
filter.Owner = owner
}
if source := strings.TrimSpace(c.Query("error_source")); source != "" {
filter.Source = source
}
if v := strings.TrimSpace(c.Query("resolved")); v != "" {
switch strings.ToLower(v) {
case "1", "true", "yes":
b := true
filter.Resolved = &b
case "0", "false", "no":
b := false
filter.Resolved = &b
default:
response.BadRequest(c, "Invalid resolved")
return
}
}
if q := strings.TrimSpace(c.Query("q")); q != "" {
filter.Query = q
}
@@ -242,6 +261,11 @@ func (h *OpsHandler) ListRequestDetails(c *gin.Context) {
type opsRetryRequest struct {
Mode string `json:"mode"`
PinnedAccountID *int64 `json:"pinned_account_id"`
Force bool `json:"force"`
}
type opsResolveRequest struct {
Resolved bool `json:"resolved"`
}
// RetryErrorRequest retries a failed request using stored request_body.
@@ -278,6 +302,8 @@ func (h *OpsHandler) RetryErrorRequest(c *gin.Context) {
req.Mode = service.OpsRetryModeClient
}
// Force flag is currently a UI-level acknowledgement. Server may still enforce safety constraints.
_ = req.Force
result, err := h.opsService.RetryError(c.Request.Context(), subject.UserID, id, req.Mode, req.PinnedAccountID)
if err != nil {
response.ErrorFrom(c, err)
@@ -287,6 +313,81 @@ func (h *OpsHandler) RetryErrorRequest(c *gin.Context) {
response.Success(c, result)
}
// ListRetryAttempts lists retry attempts for an error log.
// GET /api/v1/admin/ops/errors/:id/retries
func (h *OpsHandler) ListRetryAttempts(c *gin.Context) {
if h.opsService == nil {
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
return
}
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
response.ErrorFrom(c, err)
return
}
idStr := strings.TrimSpace(c.Param("id"))
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil || id <= 0 {
response.BadRequest(c, "Invalid error id")
return
}
limit := 50
if v := strings.TrimSpace(c.Query("limit")); v != "" {
n, err := strconv.Atoi(v)
if err != nil || n <= 0 {
response.BadRequest(c, "Invalid limit")
return
}
limit = n
}
items, err := h.opsService.ListRetryAttemptsByErrorID(c.Request.Context(), id, limit)
if err != nil {
response.ErrorFrom(c, err)
return
}
response.Success(c, items)
}
// UpdateErrorResolution allows manual resolve/unresolve.
// PUT /api/v1/admin/ops/errors/:id/resolve
func (h *OpsHandler) UpdateErrorResolution(c *gin.Context) {
if h.opsService == nil {
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
return
}
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
response.ErrorFrom(c, err)
return
}
subject, ok := middleware.GetAuthSubjectFromContext(c)
if !ok || subject.UserID <= 0 {
response.Error(c, http.StatusUnauthorized, "Unauthorized")
return
}
idStr := strings.TrimSpace(c.Param("id"))
id, err := strconv.ParseInt(idStr, 10, 64)
if err != nil || id <= 0 {
response.BadRequest(c, "Invalid error id")
return
}
var req opsResolveRequest
if err := c.ShouldBindJSON(&req); err != nil {
response.BadRequest(c, "Invalid request: "+err.Error())
return
}
uid := subject.UserID
if err := h.opsService.UpdateErrorResolution(c.Request.Context(), id, req.Resolved, &uid, nil); err != nil {
response.ErrorFrom(c, err)
return
}
response.Success(c, gin.H{"ok": true})
}
func parseOpsTimeRange(c *gin.Context, defaultRange string) (time.Time, time.Time, error) {
startStr := strings.TrimSpace(c.Query("start_time"))
endStr := strings.TrimSpace(c.Query("end_time"))
@@ -358,6 +459,10 @@ func parseOpsDuration(v string) (time.Duration, bool) {
return 6 * time.Hour, true
case "24h":
return 24 * time.Hour, true
case "7d":
return 7 * 24 * time.Hour, true
case "30d":
return 30 * 24 * time.Hour, true
default:
return 0, false
}

View File

@@ -832,28 +832,30 @@ func normalizeOpsErrorType(errType string, code string) string {
func classifyOpsPhase(errType, message, code string) string {
msg := strings.ToLower(message)
// Standardized phases: request|auth|routing|upstream|network|internal
// Map billing/concurrency/response => request; scheduling => routing.
switch strings.TrimSpace(code) {
case "INSUFFICIENT_BALANCE", "USAGE_LIMIT_EXCEEDED", "SUBSCRIPTION_NOT_FOUND", "SUBSCRIPTION_INVALID":
return "billing"
return "request"
}
switch errType {
case "authentication_error":
return "auth"
case "billing_error", "subscription_error":
return "billing"
return "request"
case "rate_limit_error":
if strings.Contains(msg, "concurrency") || strings.Contains(msg, "pending") || strings.Contains(msg, "queue") {
return "concurrency"
return "request"
}
return "upstream"
case "invalid_request_error":
return "response"
return "request"
case "upstream_error", "overloaded_error":
return "upstream"
case "api_error":
if strings.Contains(msg, "no available accounts") {
return "scheduling"
return "routing"
}
return "internal"
default:
@@ -914,34 +916,38 @@ func classifyOpsIsBusinessLimited(errType, phase, code string, status int, messa
}
func classifyOpsErrorOwner(phase string, message string) string {
// Standardized owners: client|provider|platform
switch phase {
case "upstream", "network":
return "provider"
case "billing", "concurrency", "auth", "response":
case "request", "auth":
return "client"
case "routing", "internal":
return "platform"
default:
if strings.Contains(strings.ToLower(message), "upstream") {
return "provider"
}
return "sub2api"
return "platform"
}
}
func classifyOpsErrorSource(phase string, message string) string {
// Standardized sources: client_request|upstream_http|gateway
switch phase {
case "upstream":
return "upstream_http"
case "network":
return "upstream_network"
case "billing":
return "billing"
case "concurrency":
return "concurrency"
return "gateway"
case "request", "auth":
return "client_request"
case "routing", "internal":
return "gateway"
default:
if strings.Contains(strings.ToLower(message), "upstream") {
return "upstream_http"
}
return "internal"
return "gateway"
}
}

View File

@@ -81,6 +81,9 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
ops.PUT("/alert-rules/:id", h.Admin.Ops.UpdateAlertRule)
ops.DELETE("/alert-rules/:id", h.Admin.Ops.DeleteAlertRule)
ops.GET("/alert-events", h.Admin.Ops.ListAlertEvents)
ops.GET("/alert-events/:id", h.Admin.Ops.GetAlertEvent)
ops.PUT("/alert-events/:id/status", h.Admin.Ops.UpdateAlertEventStatus)
ops.POST("/alert-silences", h.Admin.Ops.CreateAlertSilence)
// Email notification config (DB-backed)
ops.GET("/email-notification/config", h.Admin.Ops.GetEmailNotificationConfig)
@@ -113,7 +116,9 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
// Error logs (MVP-1)
ops.GET("/errors", h.Admin.Ops.GetErrorLogs)
ops.GET("/errors/:id", h.Admin.Ops.GetErrorLogByID)
ops.GET("/errors/:id/retries", h.Admin.Ops.ListRetryAttempts)
ops.POST("/errors/:id/retry", h.Admin.Ops.RetryErrorRequest)
ops.PUT("/errors/:id/resolve", h.Admin.Ops.UpdateErrorResolution)
// Request drilldown (success + error)
ops.GET("/requests", h.Admin.Ops.ListRequestDetails)