feat(handler): 新增ops管理接口和路由
- 添加告警静默管理接口 - 扩展错误日志查询和操作接口 - 新增重试和解决状态相关端点 - 完善错误日志记录功能
This commit is contained in:
@@ -7,8 +7,10 @@ import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Wei-Shaw/sub2api/internal/pkg/response"
|
||||
"github.com/Wei-Shaw/sub2api/internal/server/middleware"
|
||||
"github.com/Wei-Shaw/sub2api/internal/service"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/gin-gonic/gin/binding"
|
||||
@@ -372,8 +374,135 @@ func (h *OpsHandler) DeleteAlertRule(c *gin.Context) {
|
||||
response.Success(c, gin.H{"deleted": true})
|
||||
}
|
||||
|
||||
// GetAlertEvent returns a single ops alert event.
|
||||
// GET /api/v1/admin/ops/alert-events/:id
|
||||
func (h *OpsHandler) GetAlertEvent(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid event ID")
|
||||
return
|
||||
}
|
||||
|
||||
ev, err := h.opsService.GetAlertEventByID(c.Request.Context(), id)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, ev)
|
||||
}
|
||||
|
||||
// UpdateAlertEventStatus updates an ops alert event status.
|
||||
// PUT /api/v1/admin/ops/alert-events/:id/status
|
||||
func (h *OpsHandler) UpdateAlertEventStatus(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid event ID")
|
||||
return
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
Status string `json:"status"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&payload); err != nil {
|
||||
response.BadRequest(c, "Invalid request body")
|
||||
return
|
||||
}
|
||||
payload.Status = strings.TrimSpace(payload.Status)
|
||||
if payload.Status == "" {
|
||||
response.BadRequest(c, "Invalid status")
|
||||
return
|
||||
}
|
||||
if payload.Status != service.OpsAlertStatusResolved && payload.Status != service.OpsAlertStatusManualResolved {
|
||||
response.BadRequest(c, "Invalid status")
|
||||
return
|
||||
}
|
||||
|
||||
var resolvedAt *time.Time
|
||||
if payload.Status == service.OpsAlertStatusResolved || payload.Status == service.OpsAlertStatusManualResolved {
|
||||
now := time.Now().UTC()
|
||||
resolvedAt = &now
|
||||
}
|
||||
if err := h.opsService.UpdateAlertEventStatus(c.Request.Context(), id, payload.Status, resolvedAt); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, gin.H{"updated": true})
|
||||
}
|
||||
|
||||
// ListAlertEvents lists recent ops alert events.
|
||||
// GET /api/v1/admin/ops/alert-events
|
||||
// CreateAlertSilence creates a scoped silence for ops alerts.
|
||||
// POST /api/v1/admin/ops/alert-silences
|
||||
func (h *OpsHandler) CreateAlertSilence(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
RuleID int64 `json:"rule_id"`
|
||||
Platform string `json:"platform"`
|
||||
GroupID *int64 `json:"group_id"`
|
||||
Region *string `json:"region"`
|
||||
Until string `json:"until"`
|
||||
Reason string `json:"reason"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&payload); err != nil {
|
||||
response.BadRequest(c, "Invalid request body")
|
||||
return
|
||||
}
|
||||
until, err := time.Parse(time.RFC3339, strings.TrimSpace(payload.Until))
|
||||
if err != nil {
|
||||
response.BadRequest(c, "Invalid until")
|
||||
return
|
||||
}
|
||||
|
||||
createdBy := (*int64)(nil)
|
||||
if subject, ok := middleware.GetAuthSubjectFromContext(c); ok {
|
||||
uid := subject.UserID
|
||||
createdBy = &uid
|
||||
}
|
||||
|
||||
silence := &service.OpsAlertSilence{
|
||||
RuleID: payload.RuleID,
|
||||
Platform: strings.TrimSpace(payload.Platform),
|
||||
GroupID: payload.GroupID,
|
||||
Region: payload.Region,
|
||||
Until: until,
|
||||
Reason: strings.TrimSpace(payload.Reason),
|
||||
CreatedBy: createdBy,
|
||||
}
|
||||
|
||||
created, err := h.opsService.CreateAlertSilence(c.Request.Context(), silence)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, created)
|
||||
}
|
||||
|
||||
func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
@@ -384,7 +513,7 @@ func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
limit := 100
|
||||
limit := 20
|
||||
if raw := strings.TrimSpace(c.Query("limit")); raw != "" {
|
||||
n, err := strconv.Atoi(raw)
|
||||
if err != nil || n <= 0 {
|
||||
@@ -400,6 +529,43 @@ func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
|
||||
Severity: strings.TrimSpace(c.Query("severity")),
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(c.Query("email_sent")); v != "" {
|
||||
vv := strings.ToLower(v)
|
||||
switch vv {
|
||||
case "true", "1":
|
||||
b := true
|
||||
filter.EmailSent = &b
|
||||
case "false", "0":
|
||||
b := false
|
||||
filter.EmailSent = &b
|
||||
default:
|
||||
response.BadRequest(c, "Invalid email_sent")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Cursor pagination
|
||||
if rawTS := strings.TrimSpace(c.Query("before_fired_at")); rawTS != "" {
|
||||
ts, err := time.Parse(time.RFC3339Nano, rawTS)
|
||||
if err != nil {
|
||||
if t2, err2 := time.Parse(time.RFC3339, rawTS); err2 == nil {
|
||||
ts = t2
|
||||
} else {
|
||||
response.BadRequest(c, "Invalid before_fired_at")
|
||||
return
|
||||
}
|
||||
}
|
||||
filter.BeforeFiredAt = &ts
|
||||
}
|
||||
if rawID := strings.TrimSpace(c.Query("before_id")); rawID != "" {
|
||||
id, err := strconv.ParseInt(rawID, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid before_id")
|
||||
return
|
||||
}
|
||||
filter.BeforeID = &id
|
||||
}
|
||||
|
||||
// Optional global filter support (platform/group/time range).
|
||||
if platform := strings.TrimSpace(c.Query("platform")); platform != "" {
|
||||
filter.Platform = platform
|
||||
|
||||
@@ -80,6 +80,25 @@ func (h *OpsHandler) GetErrorLogs(c *gin.Context) {
|
||||
if phase := strings.TrimSpace(c.Query("phase")); phase != "" {
|
||||
filter.Phase = phase
|
||||
}
|
||||
if owner := strings.TrimSpace(c.Query("error_owner")); owner != "" {
|
||||
filter.Owner = owner
|
||||
}
|
||||
if source := strings.TrimSpace(c.Query("error_source")); source != "" {
|
||||
filter.Source = source
|
||||
}
|
||||
if v := strings.TrimSpace(c.Query("resolved")); v != "" {
|
||||
switch strings.ToLower(v) {
|
||||
case "1", "true", "yes":
|
||||
b := true
|
||||
filter.Resolved = &b
|
||||
case "0", "false", "no":
|
||||
b := false
|
||||
filter.Resolved = &b
|
||||
default:
|
||||
response.BadRequest(c, "Invalid resolved")
|
||||
return
|
||||
}
|
||||
}
|
||||
if q := strings.TrimSpace(c.Query("q")); q != "" {
|
||||
filter.Query = q
|
||||
}
|
||||
@@ -242,6 +261,11 @@ func (h *OpsHandler) ListRequestDetails(c *gin.Context) {
|
||||
type opsRetryRequest struct {
|
||||
Mode string `json:"mode"`
|
||||
PinnedAccountID *int64 `json:"pinned_account_id"`
|
||||
Force bool `json:"force"`
|
||||
}
|
||||
|
||||
type opsResolveRequest struct {
|
||||
Resolved bool `json:"resolved"`
|
||||
}
|
||||
|
||||
// RetryErrorRequest retries a failed request using stored request_body.
|
||||
@@ -278,6 +302,8 @@ func (h *OpsHandler) RetryErrorRequest(c *gin.Context) {
|
||||
req.Mode = service.OpsRetryModeClient
|
||||
}
|
||||
|
||||
// Force flag is currently a UI-level acknowledgement. Server may still enforce safety constraints.
|
||||
_ = req.Force
|
||||
result, err := h.opsService.RetryError(c.Request.Context(), subject.UserID, id, req.Mode, req.PinnedAccountID)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
@@ -287,6 +313,81 @@ func (h *OpsHandler) RetryErrorRequest(c *gin.Context) {
|
||||
response.Success(c, result)
|
||||
}
|
||||
|
||||
// ListRetryAttempts lists retry attempts for an error log.
|
||||
// GET /api/v1/admin/ops/errors/:id/retries
|
||||
func (h *OpsHandler) ListRetryAttempts(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
idStr := strings.TrimSpace(c.Param("id"))
|
||||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid error id")
|
||||
return
|
||||
}
|
||||
|
||||
limit := 50
|
||||
if v := strings.TrimSpace(c.Query("limit")); v != "" {
|
||||
n, err := strconv.Atoi(v)
|
||||
if err != nil || n <= 0 {
|
||||
response.BadRequest(c, "Invalid limit")
|
||||
return
|
||||
}
|
||||
limit = n
|
||||
}
|
||||
|
||||
items, err := h.opsService.ListRetryAttemptsByErrorID(c.Request.Context(), id, limit)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, items)
|
||||
}
|
||||
|
||||
// UpdateErrorResolution allows manual resolve/unresolve.
|
||||
// PUT /api/v1/admin/ops/errors/:id/resolve
|
||||
func (h *OpsHandler) UpdateErrorResolution(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
subject, ok := middleware.GetAuthSubjectFromContext(c)
|
||||
if !ok || subject.UserID <= 0 {
|
||||
response.Error(c, http.StatusUnauthorized, "Unauthorized")
|
||||
return
|
||||
}
|
||||
|
||||
idStr := strings.TrimSpace(c.Param("id"))
|
||||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid error id")
|
||||
return
|
||||
}
|
||||
|
||||
var req opsResolveRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
response.BadRequest(c, "Invalid request: "+err.Error())
|
||||
return
|
||||
}
|
||||
uid := subject.UserID
|
||||
if err := h.opsService.UpdateErrorResolution(c.Request.Context(), id, req.Resolved, &uid, nil); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, gin.H{"ok": true})
|
||||
}
|
||||
|
||||
func parseOpsTimeRange(c *gin.Context, defaultRange string) (time.Time, time.Time, error) {
|
||||
startStr := strings.TrimSpace(c.Query("start_time"))
|
||||
endStr := strings.TrimSpace(c.Query("end_time"))
|
||||
@@ -358,6 +459,10 @@ func parseOpsDuration(v string) (time.Duration, bool) {
|
||||
return 6 * time.Hour, true
|
||||
case "24h":
|
||||
return 24 * time.Hour, true
|
||||
case "7d":
|
||||
return 7 * 24 * time.Hour, true
|
||||
case "30d":
|
||||
return 30 * 24 * time.Hour, true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
|
||||
@@ -832,28 +832,30 @@ func normalizeOpsErrorType(errType string, code string) string {
|
||||
|
||||
func classifyOpsPhase(errType, message, code string) string {
|
||||
msg := strings.ToLower(message)
|
||||
// Standardized phases: request|auth|routing|upstream|network|internal
|
||||
// Map billing/concurrency/response => request; scheduling => routing.
|
||||
switch strings.TrimSpace(code) {
|
||||
case "INSUFFICIENT_BALANCE", "USAGE_LIMIT_EXCEEDED", "SUBSCRIPTION_NOT_FOUND", "SUBSCRIPTION_INVALID":
|
||||
return "billing"
|
||||
return "request"
|
||||
}
|
||||
|
||||
switch errType {
|
||||
case "authentication_error":
|
||||
return "auth"
|
||||
case "billing_error", "subscription_error":
|
||||
return "billing"
|
||||
return "request"
|
||||
case "rate_limit_error":
|
||||
if strings.Contains(msg, "concurrency") || strings.Contains(msg, "pending") || strings.Contains(msg, "queue") {
|
||||
return "concurrency"
|
||||
return "request"
|
||||
}
|
||||
return "upstream"
|
||||
case "invalid_request_error":
|
||||
return "response"
|
||||
return "request"
|
||||
case "upstream_error", "overloaded_error":
|
||||
return "upstream"
|
||||
case "api_error":
|
||||
if strings.Contains(msg, "no available accounts") {
|
||||
return "scheduling"
|
||||
return "routing"
|
||||
}
|
||||
return "internal"
|
||||
default:
|
||||
@@ -914,34 +916,38 @@ func classifyOpsIsBusinessLimited(errType, phase, code string, status int, messa
|
||||
}
|
||||
|
||||
func classifyOpsErrorOwner(phase string, message string) string {
|
||||
// Standardized owners: client|provider|platform
|
||||
switch phase {
|
||||
case "upstream", "network":
|
||||
return "provider"
|
||||
case "billing", "concurrency", "auth", "response":
|
||||
case "request", "auth":
|
||||
return "client"
|
||||
case "routing", "internal":
|
||||
return "platform"
|
||||
default:
|
||||
if strings.Contains(strings.ToLower(message), "upstream") {
|
||||
return "provider"
|
||||
}
|
||||
return "sub2api"
|
||||
return "platform"
|
||||
}
|
||||
}
|
||||
|
||||
func classifyOpsErrorSource(phase string, message string) string {
|
||||
// Standardized sources: client_request|upstream_http|gateway
|
||||
switch phase {
|
||||
case "upstream":
|
||||
return "upstream_http"
|
||||
case "network":
|
||||
return "upstream_network"
|
||||
case "billing":
|
||||
return "billing"
|
||||
case "concurrency":
|
||||
return "concurrency"
|
||||
return "gateway"
|
||||
case "request", "auth":
|
||||
return "client_request"
|
||||
case "routing", "internal":
|
||||
return "gateway"
|
||||
default:
|
||||
if strings.Contains(strings.ToLower(message), "upstream") {
|
||||
return "upstream_http"
|
||||
}
|
||||
return "internal"
|
||||
return "gateway"
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -81,6 +81,9 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
|
||||
ops.PUT("/alert-rules/:id", h.Admin.Ops.UpdateAlertRule)
|
||||
ops.DELETE("/alert-rules/:id", h.Admin.Ops.DeleteAlertRule)
|
||||
ops.GET("/alert-events", h.Admin.Ops.ListAlertEvents)
|
||||
ops.GET("/alert-events/:id", h.Admin.Ops.GetAlertEvent)
|
||||
ops.PUT("/alert-events/:id/status", h.Admin.Ops.UpdateAlertEventStatus)
|
||||
ops.POST("/alert-silences", h.Admin.Ops.CreateAlertSilence)
|
||||
|
||||
// Email notification config (DB-backed)
|
||||
ops.GET("/email-notification/config", h.Admin.Ops.GetEmailNotificationConfig)
|
||||
@@ -113,7 +116,9 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
|
||||
// Error logs (MVP-1)
|
||||
ops.GET("/errors", h.Admin.Ops.GetErrorLogs)
|
||||
ops.GET("/errors/:id", h.Admin.Ops.GetErrorLogByID)
|
||||
ops.GET("/errors/:id/retries", h.Admin.Ops.ListRetryAttempts)
|
||||
ops.POST("/errors/:id/retry", h.Admin.Ops.RetryErrorRequest)
|
||||
ops.PUT("/errors/:id/resolve", h.Admin.Ops.UpdateErrorResolution)
|
||||
|
||||
// Request drilldown (success + error)
|
||||
ops.GET("/requests", h.Admin.Ops.ListRequestDetails)
|
||||
|
||||
Reference in New Issue
Block a user