Merge pull request #285 from IanShaw027/fix/ops-bug
feat(ops): 增强错误日志管理、告警静默和前端 UI 优化
This commit is contained in:
@@ -7,8 +7,10 @@ import (
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/Wei-Shaw/sub2api/internal/pkg/response"
|
||||
"github.com/Wei-Shaw/sub2api/internal/server/middleware"
|
||||
"github.com/Wei-Shaw/sub2api/internal/service"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/gin-gonic/gin/binding"
|
||||
@@ -18,8 +20,6 @@ var validOpsAlertMetricTypes = []string{
|
||||
"success_rate",
|
||||
"error_rate",
|
||||
"upstream_error_rate",
|
||||
"p95_latency_ms",
|
||||
"p99_latency_ms",
|
||||
"cpu_usage_percent",
|
||||
"memory_usage_percent",
|
||||
"concurrency_queue_depth",
|
||||
@@ -372,8 +372,135 @@ func (h *OpsHandler) DeleteAlertRule(c *gin.Context) {
|
||||
response.Success(c, gin.H{"deleted": true})
|
||||
}
|
||||
|
||||
// GetAlertEvent returns a single ops alert event.
|
||||
// GET /api/v1/admin/ops/alert-events/:id
|
||||
func (h *OpsHandler) GetAlertEvent(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid event ID")
|
||||
return
|
||||
}
|
||||
|
||||
ev, err := h.opsService.GetAlertEventByID(c.Request.Context(), id)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, ev)
|
||||
}
|
||||
|
||||
// UpdateAlertEventStatus updates an ops alert event status.
|
||||
// PUT /api/v1/admin/ops/alert-events/:id/status
|
||||
func (h *OpsHandler) UpdateAlertEventStatus(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid event ID")
|
||||
return
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
Status string `json:"status"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&payload); err != nil {
|
||||
response.BadRequest(c, "Invalid request body")
|
||||
return
|
||||
}
|
||||
payload.Status = strings.TrimSpace(payload.Status)
|
||||
if payload.Status == "" {
|
||||
response.BadRequest(c, "Invalid status")
|
||||
return
|
||||
}
|
||||
if payload.Status != service.OpsAlertStatusResolved && payload.Status != service.OpsAlertStatusManualResolved {
|
||||
response.BadRequest(c, "Invalid status")
|
||||
return
|
||||
}
|
||||
|
||||
var resolvedAt *time.Time
|
||||
if payload.Status == service.OpsAlertStatusResolved || payload.Status == service.OpsAlertStatusManualResolved {
|
||||
now := time.Now().UTC()
|
||||
resolvedAt = &now
|
||||
}
|
||||
if err := h.opsService.UpdateAlertEventStatus(c.Request.Context(), id, payload.Status, resolvedAt); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, gin.H{"updated": true})
|
||||
}
|
||||
|
||||
// ListAlertEvents lists recent ops alert events.
|
||||
// GET /api/v1/admin/ops/alert-events
|
||||
// CreateAlertSilence creates a scoped silence for ops alerts.
|
||||
// POST /api/v1/admin/ops/alert-silences
|
||||
func (h *OpsHandler) CreateAlertSilence(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
RuleID int64 `json:"rule_id"`
|
||||
Platform string `json:"platform"`
|
||||
GroupID *int64 `json:"group_id"`
|
||||
Region *string `json:"region"`
|
||||
Until string `json:"until"`
|
||||
Reason string `json:"reason"`
|
||||
}
|
||||
if err := c.ShouldBindJSON(&payload); err != nil {
|
||||
response.BadRequest(c, "Invalid request body")
|
||||
return
|
||||
}
|
||||
until, err := time.Parse(time.RFC3339, strings.TrimSpace(payload.Until))
|
||||
if err != nil {
|
||||
response.BadRequest(c, "Invalid until")
|
||||
return
|
||||
}
|
||||
|
||||
createdBy := (*int64)(nil)
|
||||
if subject, ok := middleware.GetAuthSubjectFromContext(c); ok {
|
||||
uid := subject.UserID
|
||||
createdBy = &uid
|
||||
}
|
||||
|
||||
silence := &service.OpsAlertSilence{
|
||||
RuleID: payload.RuleID,
|
||||
Platform: strings.TrimSpace(payload.Platform),
|
||||
GroupID: payload.GroupID,
|
||||
Region: payload.Region,
|
||||
Until: until,
|
||||
Reason: strings.TrimSpace(payload.Reason),
|
||||
CreatedBy: createdBy,
|
||||
}
|
||||
|
||||
created, err := h.opsService.CreateAlertSilence(c.Request.Context(), silence)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, created)
|
||||
}
|
||||
|
||||
func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
@@ -384,7 +511,7 @@ func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
limit := 100
|
||||
limit := 20
|
||||
if raw := strings.TrimSpace(c.Query("limit")); raw != "" {
|
||||
n, err := strconv.Atoi(raw)
|
||||
if err != nil || n <= 0 {
|
||||
@@ -400,6 +527,49 @@ func (h *OpsHandler) ListAlertEvents(c *gin.Context) {
|
||||
Severity: strings.TrimSpace(c.Query("severity")),
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(c.Query("email_sent")); v != "" {
|
||||
vv := strings.ToLower(v)
|
||||
switch vv {
|
||||
case "true", "1":
|
||||
b := true
|
||||
filter.EmailSent = &b
|
||||
case "false", "0":
|
||||
b := false
|
||||
filter.EmailSent = &b
|
||||
default:
|
||||
response.BadRequest(c, "Invalid email_sent")
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Cursor pagination: both params must be provided together.
|
||||
rawTS := strings.TrimSpace(c.Query("before_fired_at"))
|
||||
rawID := strings.TrimSpace(c.Query("before_id"))
|
||||
if (rawTS == "") != (rawID == "") {
|
||||
response.BadRequest(c, "before_fired_at and before_id must be provided together")
|
||||
return
|
||||
}
|
||||
if rawTS != "" {
|
||||
ts, err := time.Parse(time.RFC3339Nano, rawTS)
|
||||
if err != nil {
|
||||
if t2, err2 := time.Parse(time.RFC3339, rawTS); err2 == nil {
|
||||
ts = t2
|
||||
} else {
|
||||
response.BadRequest(c, "Invalid before_fired_at")
|
||||
return
|
||||
}
|
||||
}
|
||||
filter.BeforeFiredAt = &ts
|
||||
}
|
||||
if rawID != "" {
|
||||
id, err := strconv.ParseInt(rawID, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid before_id")
|
||||
return
|
||||
}
|
||||
filter.BeforeID = &id
|
||||
}
|
||||
|
||||
// Optional global filter support (platform/group/time range).
|
||||
if platform := strings.TrimSpace(c.Query("platform")); platform != "" {
|
||||
filter.Platform = platform
|
||||
|
||||
@@ -19,6 +19,57 @@ type OpsHandler struct {
|
||||
opsService *service.OpsService
|
||||
}
|
||||
|
||||
// GetErrorLogByID returns ops error log detail.
|
||||
// GET /api/v1/admin/ops/errors/:id
|
||||
func (h *OpsHandler) GetErrorLogByID(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
idStr := strings.TrimSpace(c.Param("id"))
|
||||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid error id")
|
||||
return
|
||||
}
|
||||
|
||||
detail, err := h.opsService.GetErrorLogByID(c.Request.Context(), id)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
response.Success(c, detail)
|
||||
}
|
||||
|
||||
const (
|
||||
opsListViewErrors = "errors"
|
||||
opsListViewExcluded = "excluded"
|
||||
opsListViewAll = "all"
|
||||
)
|
||||
|
||||
func parseOpsViewParam(c *gin.Context) string {
|
||||
if c == nil {
|
||||
return ""
|
||||
}
|
||||
v := strings.ToLower(strings.TrimSpace(c.Query("view")))
|
||||
switch v {
|
||||
case "", opsListViewErrors:
|
||||
return opsListViewErrors
|
||||
case opsListViewExcluded:
|
||||
return opsListViewExcluded
|
||||
case opsListViewAll:
|
||||
return opsListViewAll
|
||||
default:
|
||||
return opsListViewErrors
|
||||
}
|
||||
}
|
||||
|
||||
func NewOpsHandler(opsService *service.OpsService) *OpsHandler {
|
||||
return &OpsHandler{opsService: opsService}
|
||||
}
|
||||
@@ -47,16 +98,26 @@ func (h *OpsHandler) GetErrorLogs(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
filter := &service.OpsErrorLogFilter{
|
||||
Page: page,
|
||||
PageSize: pageSize,
|
||||
}
|
||||
filter := &service.OpsErrorLogFilter{Page: page, PageSize: pageSize}
|
||||
|
||||
if !startTime.IsZero() {
|
||||
filter.StartTime = &startTime
|
||||
}
|
||||
if !endTime.IsZero() {
|
||||
filter.EndTime = &endTime
|
||||
}
|
||||
filter.View = parseOpsViewParam(c)
|
||||
filter.Phase = strings.TrimSpace(c.Query("phase"))
|
||||
filter.Owner = strings.TrimSpace(c.Query("error_owner"))
|
||||
filter.Source = strings.TrimSpace(c.Query("error_source"))
|
||||
filter.Query = strings.TrimSpace(c.Query("q"))
|
||||
filter.UserQuery = strings.TrimSpace(c.Query("user_query"))
|
||||
|
||||
// Force request errors: client-visible status >= 400.
|
||||
// buildOpsErrorLogsWhere already applies this for non-upstream phase.
|
||||
if strings.EqualFold(strings.TrimSpace(filter.Phase), "upstream") {
|
||||
filter.Phase = ""
|
||||
}
|
||||
|
||||
if platform := strings.TrimSpace(c.Query("platform")); platform != "" {
|
||||
filter.Platform = platform
|
||||
@@ -77,11 +138,19 @@ func (h *OpsHandler) GetErrorLogs(c *gin.Context) {
|
||||
}
|
||||
filter.AccountID = &id
|
||||
}
|
||||
if phase := strings.TrimSpace(c.Query("phase")); phase != "" {
|
||||
filter.Phase = phase
|
||||
}
|
||||
if q := strings.TrimSpace(c.Query("q")); q != "" {
|
||||
filter.Query = q
|
||||
|
||||
if v := strings.TrimSpace(c.Query("resolved")); v != "" {
|
||||
switch strings.ToLower(v) {
|
||||
case "1", "true", "yes":
|
||||
b := true
|
||||
filter.Resolved = &b
|
||||
case "0", "false", "no":
|
||||
b := false
|
||||
filter.Resolved = &b
|
||||
default:
|
||||
response.BadRequest(c, "Invalid resolved")
|
||||
return
|
||||
}
|
||||
}
|
||||
if statusCodesStr := strings.TrimSpace(c.Query("status_codes")); statusCodesStr != "" {
|
||||
parts := strings.Split(statusCodesStr, ",")
|
||||
@@ -106,13 +175,120 @@ func (h *OpsHandler) GetErrorLogs(c *gin.Context) {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
response.Paginated(c, result.Errors, int64(result.Total), result.Page, result.PageSize)
|
||||
}
|
||||
|
||||
// GetErrorLogByID returns a single error log detail.
|
||||
// GET /api/v1/admin/ops/errors/:id
|
||||
func (h *OpsHandler) GetErrorLogByID(c *gin.Context) {
|
||||
// ListRequestErrors lists client-visible request errors.
|
||||
// GET /api/v1/admin/ops/request-errors
|
||||
func (h *OpsHandler) ListRequestErrors(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
page, pageSize := response.ParsePagination(c)
|
||||
if pageSize > 500 {
|
||||
pageSize = 500
|
||||
}
|
||||
startTime, endTime, err := parseOpsTimeRange(c, "1h")
|
||||
if err != nil {
|
||||
response.BadRequest(c, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
filter := &service.OpsErrorLogFilter{Page: page, PageSize: pageSize}
|
||||
if !startTime.IsZero() {
|
||||
filter.StartTime = &startTime
|
||||
}
|
||||
if !endTime.IsZero() {
|
||||
filter.EndTime = &endTime
|
||||
}
|
||||
filter.View = parseOpsViewParam(c)
|
||||
filter.Phase = strings.TrimSpace(c.Query("phase"))
|
||||
filter.Owner = strings.TrimSpace(c.Query("error_owner"))
|
||||
filter.Source = strings.TrimSpace(c.Query("error_source"))
|
||||
filter.Query = strings.TrimSpace(c.Query("q"))
|
||||
filter.UserQuery = strings.TrimSpace(c.Query("user_query"))
|
||||
|
||||
// Force request errors: client-visible status >= 400.
|
||||
// buildOpsErrorLogsWhere already applies this for non-upstream phase.
|
||||
if strings.EqualFold(strings.TrimSpace(filter.Phase), "upstream") {
|
||||
filter.Phase = ""
|
||||
}
|
||||
|
||||
if platform := strings.TrimSpace(c.Query("platform")); platform != "" {
|
||||
filter.Platform = platform
|
||||
}
|
||||
if v := strings.TrimSpace(c.Query("group_id")); v != "" {
|
||||
id, err := strconv.ParseInt(v, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid group_id")
|
||||
return
|
||||
}
|
||||
filter.GroupID = &id
|
||||
}
|
||||
if v := strings.TrimSpace(c.Query("account_id")); v != "" {
|
||||
id, err := strconv.ParseInt(v, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid account_id")
|
||||
return
|
||||
}
|
||||
filter.AccountID = &id
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(c.Query("resolved")); v != "" {
|
||||
switch strings.ToLower(v) {
|
||||
case "1", "true", "yes":
|
||||
b := true
|
||||
filter.Resolved = &b
|
||||
case "0", "false", "no":
|
||||
b := false
|
||||
filter.Resolved = &b
|
||||
default:
|
||||
response.BadRequest(c, "Invalid resolved")
|
||||
return
|
||||
}
|
||||
}
|
||||
if statusCodesStr := strings.TrimSpace(c.Query("status_codes")); statusCodesStr != "" {
|
||||
parts := strings.Split(statusCodesStr, ",")
|
||||
out := make([]int, 0, len(parts))
|
||||
for _, part := range parts {
|
||||
p := strings.TrimSpace(part)
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
n, err := strconv.Atoi(p)
|
||||
if err != nil || n < 0 {
|
||||
response.BadRequest(c, "Invalid status_codes")
|
||||
return
|
||||
}
|
||||
out = append(out, n)
|
||||
}
|
||||
filter.StatusCodes = out
|
||||
}
|
||||
|
||||
result, err := h.opsService.GetErrorLogs(c.Request.Context(), filter)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Paginated(c, result.Errors, int64(result.Total), result.Page, result.PageSize)
|
||||
}
|
||||
|
||||
// GetRequestError returns request error detail.
|
||||
// GET /api/v1/admin/ops/request-errors/:id
|
||||
func (h *OpsHandler) GetRequestError(c *gin.Context) {
|
||||
// same storage; just proxy to existing detail
|
||||
h.GetErrorLogByID(c)
|
||||
}
|
||||
|
||||
// ListRequestErrorUpstreamErrors lists upstream error logs correlated to a request error.
|
||||
// GET /api/v1/admin/ops/request-errors/:id/upstream-errors
|
||||
func (h *OpsHandler) ListRequestErrorUpstreamErrors(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
@@ -129,15 +305,306 @@ func (h *OpsHandler) GetErrorLogByID(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
// Load request error to get correlation keys.
|
||||
detail, err := h.opsService.GetErrorLogByID(c.Request.Context(), id)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
response.Success(c, detail)
|
||||
// Correlate by request_id/client_request_id.
|
||||
requestID := strings.TrimSpace(detail.RequestID)
|
||||
clientRequestID := strings.TrimSpace(detail.ClientRequestID)
|
||||
if requestID == "" && clientRequestID == "" {
|
||||
response.Paginated(c, []*service.OpsErrorLog{}, 0, 1, 10)
|
||||
return
|
||||
}
|
||||
|
||||
page, pageSize := response.ParsePagination(c)
|
||||
if pageSize > 500 {
|
||||
pageSize = 500
|
||||
}
|
||||
|
||||
// Keep correlation window wide enough so linked upstream errors
|
||||
// are discoverable even when UI defaults to 1h elsewhere.
|
||||
startTime, endTime, err := parseOpsTimeRange(c, "30d")
|
||||
if err != nil {
|
||||
response.BadRequest(c, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
filter := &service.OpsErrorLogFilter{Page: page, PageSize: pageSize}
|
||||
if !startTime.IsZero() {
|
||||
filter.StartTime = &startTime
|
||||
}
|
||||
if !endTime.IsZero() {
|
||||
filter.EndTime = &endTime
|
||||
}
|
||||
filter.View = "all"
|
||||
filter.Phase = "upstream"
|
||||
filter.Owner = "provider"
|
||||
filter.Source = strings.TrimSpace(c.Query("error_source"))
|
||||
filter.Query = strings.TrimSpace(c.Query("q"))
|
||||
|
||||
if platform := strings.TrimSpace(c.Query("platform")); platform != "" {
|
||||
filter.Platform = platform
|
||||
}
|
||||
|
||||
// Prefer exact match on request_id; if missing, fall back to client_request_id.
|
||||
if requestID != "" {
|
||||
filter.RequestID = requestID
|
||||
} else {
|
||||
filter.ClientRequestID = clientRequestID
|
||||
}
|
||||
|
||||
result, err := h.opsService.GetErrorLogs(c.Request.Context(), filter)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
// If client asks for details, expand each upstream error log to include upstream response fields.
|
||||
includeDetail := strings.TrimSpace(c.Query("include_detail"))
|
||||
if includeDetail == "1" || strings.EqualFold(includeDetail, "true") || strings.EqualFold(includeDetail, "yes") {
|
||||
details := make([]*service.OpsErrorLogDetail, 0, len(result.Errors))
|
||||
for _, item := range result.Errors {
|
||||
if item == nil {
|
||||
continue
|
||||
}
|
||||
d, err := h.opsService.GetErrorLogByID(c.Request.Context(), item.ID)
|
||||
if err != nil || d == nil {
|
||||
continue
|
||||
}
|
||||
details = append(details, d)
|
||||
}
|
||||
response.Paginated(c, details, int64(result.Total), result.Page, result.PageSize)
|
||||
return
|
||||
}
|
||||
|
||||
response.Paginated(c, result.Errors, int64(result.Total), result.Page, result.PageSize)
|
||||
}
|
||||
|
||||
// RetryRequestErrorClient retries the client request based on stored request body.
|
||||
// POST /api/v1/admin/ops/request-errors/:id/retry-client
|
||||
func (h *OpsHandler) RetryRequestErrorClient(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
subject, ok := middleware.GetAuthSubjectFromContext(c)
|
||||
if !ok || subject.UserID <= 0 {
|
||||
response.Error(c, http.StatusUnauthorized, "Unauthorized")
|
||||
return
|
||||
}
|
||||
|
||||
idStr := strings.TrimSpace(c.Param("id"))
|
||||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid error id")
|
||||
return
|
||||
}
|
||||
|
||||
result, err := h.opsService.RetryError(c.Request.Context(), subject.UserID, id, service.OpsRetryModeClient, nil)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, result)
|
||||
}
|
||||
|
||||
// RetryRequestErrorUpstreamEvent retries a specific upstream attempt using captured upstream_request_body.
|
||||
// POST /api/v1/admin/ops/request-errors/:id/upstream-errors/:idx/retry
|
||||
func (h *OpsHandler) RetryRequestErrorUpstreamEvent(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
subject, ok := middleware.GetAuthSubjectFromContext(c)
|
||||
if !ok || subject.UserID <= 0 {
|
||||
response.Error(c, http.StatusUnauthorized, "Unauthorized")
|
||||
return
|
||||
}
|
||||
|
||||
idStr := strings.TrimSpace(c.Param("id"))
|
||||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid error id")
|
||||
return
|
||||
}
|
||||
|
||||
idxStr := strings.TrimSpace(c.Param("idx"))
|
||||
idx, err := strconv.Atoi(idxStr)
|
||||
if err != nil || idx < 0 {
|
||||
response.BadRequest(c, "Invalid upstream idx")
|
||||
return
|
||||
}
|
||||
|
||||
result, err := h.opsService.RetryUpstreamEvent(c.Request.Context(), subject.UserID, id, idx)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, result)
|
||||
}
|
||||
|
||||
// ResolveRequestError toggles resolved status.
|
||||
// PUT /api/v1/admin/ops/request-errors/:id/resolve
|
||||
func (h *OpsHandler) ResolveRequestError(c *gin.Context) {
|
||||
h.UpdateErrorResolution(c)
|
||||
}
|
||||
|
||||
// ListUpstreamErrors lists independent upstream errors.
|
||||
// GET /api/v1/admin/ops/upstream-errors
|
||||
func (h *OpsHandler) ListUpstreamErrors(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
page, pageSize := response.ParsePagination(c)
|
||||
if pageSize > 500 {
|
||||
pageSize = 500
|
||||
}
|
||||
startTime, endTime, err := parseOpsTimeRange(c, "1h")
|
||||
if err != nil {
|
||||
response.BadRequest(c, err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
filter := &service.OpsErrorLogFilter{Page: page, PageSize: pageSize}
|
||||
if !startTime.IsZero() {
|
||||
filter.StartTime = &startTime
|
||||
}
|
||||
if !endTime.IsZero() {
|
||||
filter.EndTime = &endTime
|
||||
}
|
||||
|
||||
filter.View = parseOpsViewParam(c)
|
||||
filter.Phase = "upstream"
|
||||
filter.Owner = "provider"
|
||||
filter.Source = strings.TrimSpace(c.Query("error_source"))
|
||||
filter.Query = strings.TrimSpace(c.Query("q"))
|
||||
|
||||
if platform := strings.TrimSpace(c.Query("platform")); platform != "" {
|
||||
filter.Platform = platform
|
||||
}
|
||||
if v := strings.TrimSpace(c.Query("group_id")); v != "" {
|
||||
id, err := strconv.ParseInt(v, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid group_id")
|
||||
return
|
||||
}
|
||||
filter.GroupID = &id
|
||||
}
|
||||
if v := strings.TrimSpace(c.Query("account_id")); v != "" {
|
||||
id, err := strconv.ParseInt(v, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid account_id")
|
||||
return
|
||||
}
|
||||
filter.AccountID = &id
|
||||
}
|
||||
|
||||
if v := strings.TrimSpace(c.Query("resolved")); v != "" {
|
||||
switch strings.ToLower(v) {
|
||||
case "1", "true", "yes":
|
||||
b := true
|
||||
filter.Resolved = &b
|
||||
case "0", "false", "no":
|
||||
b := false
|
||||
filter.Resolved = &b
|
||||
default:
|
||||
response.BadRequest(c, "Invalid resolved")
|
||||
return
|
||||
}
|
||||
}
|
||||
if statusCodesStr := strings.TrimSpace(c.Query("status_codes")); statusCodesStr != "" {
|
||||
parts := strings.Split(statusCodesStr, ",")
|
||||
out := make([]int, 0, len(parts))
|
||||
for _, part := range parts {
|
||||
p := strings.TrimSpace(part)
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
n, err := strconv.Atoi(p)
|
||||
if err != nil || n < 0 {
|
||||
response.BadRequest(c, "Invalid status_codes")
|
||||
return
|
||||
}
|
||||
out = append(out, n)
|
||||
}
|
||||
filter.StatusCodes = out
|
||||
}
|
||||
|
||||
result, err := h.opsService.GetErrorLogs(c.Request.Context(), filter)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Paginated(c, result.Errors, int64(result.Total), result.Page, result.PageSize)
|
||||
}
|
||||
|
||||
// GetUpstreamError returns upstream error detail.
|
||||
// GET /api/v1/admin/ops/upstream-errors/:id
|
||||
func (h *OpsHandler) GetUpstreamError(c *gin.Context) {
|
||||
h.GetErrorLogByID(c)
|
||||
}
|
||||
|
||||
// RetryUpstreamError retries upstream error using the original account_id.
|
||||
// POST /api/v1/admin/ops/upstream-errors/:id/retry
|
||||
func (h *OpsHandler) RetryUpstreamError(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
subject, ok := middleware.GetAuthSubjectFromContext(c)
|
||||
if !ok || subject.UserID <= 0 {
|
||||
response.Error(c, http.StatusUnauthorized, "Unauthorized")
|
||||
return
|
||||
}
|
||||
|
||||
idStr := strings.TrimSpace(c.Param("id"))
|
||||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid error id")
|
||||
return
|
||||
}
|
||||
|
||||
result, err := h.opsService.RetryError(c.Request.Context(), subject.UserID, id, service.OpsRetryModeUpstream, nil)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, result)
|
||||
}
|
||||
|
||||
// ResolveUpstreamError toggles resolved status.
|
||||
// PUT /api/v1/admin/ops/upstream-errors/:id/resolve
|
||||
func (h *OpsHandler) ResolveUpstreamError(c *gin.Context) {
|
||||
h.UpdateErrorResolution(c)
|
||||
}
|
||||
|
||||
// ==================== Existing endpoints ====================
|
||||
|
||||
// ListRequestDetails returns a request-level list (success + error) for drill-down.
|
||||
// GET /api/v1/admin/ops/requests
|
||||
func (h *OpsHandler) ListRequestDetails(c *gin.Context) {
|
||||
@@ -242,6 +709,11 @@ func (h *OpsHandler) ListRequestDetails(c *gin.Context) {
|
||||
type opsRetryRequest struct {
|
||||
Mode string `json:"mode"`
|
||||
PinnedAccountID *int64 `json:"pinned_account_id"`
|
||||
Force bool `json:"force"`
|
||||
}
|
||||
|
||||
type opsResolveRequest struct {
|
||||
Resolved bool `json:"resolved"`
|
||||
}
|
||||
|
||||
// RetryErrorRequest retries a failed request using stored request_body.
|
||||
@@ -278,6 +750,16 @@ func (h *OpsHandler) RetryErrorRequest(c *gin.Context) {
|
||||
req.Mode = service.OpsRetryModeClient
|
||||
}
|
||||
|
||||
// Force flag is currently a UI-level acknowledgement. Server may still enforce safety constraints.
|
||||
_ = req.Force
|
||||
|
||||
// Legacy endpoint safety: only allow retrying the client request here.
|
||||
// Upstream retries must go through the split endpoints.
|
||||
if strings.EqualFold(strings.TrimSpace(req.Mode), service.OpsRetryModeUpstream) {
|
||||
response.BadRequest(c, "upstream retry is not supported on this endpoint")
|
||||
return
|
||||
}
|
||||
|
||||
result, err := h.opsService.RetryError(c.Request.Context(), subject.UserID, id, req.Mode, req.PinnedAccountID)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
@@ -287,6 +769,81 @@ func (h *OpsHandler) RetryErrorRequest(c *gin.Context) {
|
||||
response.Success(c, result)
|
||||
}
|
||||
|
||||
// ListRetryAttempts lists retry attempts for an error log.
|
||||
// GET /api/v1/admin/ops/errors/:id/retries
|
||||
func (h *OpsHandler) ListRetryAttempts(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
idStr := strings.TrimSpace(c.Param("id"))
|
||||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid error id")
|
||||
return
|
||||
}
|
||||
|
||||
limit := 50
|
||||
if v := strings.TrimSpace(c.Query("limit")); v != "" {
|
||||
n, err := strconv.Atoi(v)
|
||||
if err != nil || n <= 0 {
|
||||
response.BadRequest(c, "Invalid limit")
|
||||
return
|
||||
}
|
||||
limit = n
|
||||
}
|
||||
|
||||
items, err := h.opsService.ListRetryAttemptsByErrorID(c.Request.Context(), id, limit)
|
||||
if err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, items)
|
||||
}
|
||||
|
||||
// UpdateErrorResolution allows manual resolve/unresolve.
|
||||
// PUT /api/v1/admin/ops/errors/:id/resolve
|
||||
func (h *OpsHandler) UpdateErrorResolution(c *gin.Context) {
|
||||
if h.opsService == nil {
|
||||
response.Error(c, http.StatusServiceUnavailable, "Ops service not available")
|
||||
return
|
||||
}
|
||||
if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
|
||||
subject, ok := middleware.GetAuthSubjectFromContext(c)
|
||||
if !ok || subject.UserID <= 0 {
|
||||
response.Error(c, http.StatusUnauthorized, "Unauthorized")
|
||||
return
|
||||
}
|
||||
|
||||
idStr := strings.TrimSpace(c.Param("id"))
|
||||
id, err := strconv.ParseInt(idStr, 10, 64)
|
||||
if err != nil || id <= 0 {
|
||||
response.BadRequest(c, "Invalid error id")
|
||||
return
|
||||
}
|
||||
|
||||
var req opsResolveRequest
|
||||
if err := c.ShouldBindJSON(&req); err != nil {
|
||||
response.BadRequest(c, "Invalid request: "+err.Error())
|
||||
return
|
||||
}
|
||||
uid := subject.UserID
|
||||
if err := h.opsService.UpdateErrorResolution(c.Request.Context(), id, req.Resolved, &uid, nil); err != nil {
|
||||
response.ErrorFrom(c, err)
|
||||
return
|
||||
}
|
||||
response.Success(c, gin.H{"ok": true})
|
||||
}
|
||||
|
||||
func parseOpsTimeRange(c *gin.Context, defaultRange string) (time.Time, time.Time, error) {
|
||||
startStr := strings.TrimSpace(c.Query("start_time"))
|
||||
endStr := strings.TrimSpace(c.Query("end_time"))
|
||||
@@ -358,6 +915,10 @@ func parseOpsDuration(v string) (time.Duration, bool) {
|
||||
return 6 * time.Hour, true
|
||||
case "24h":
|
||||
return 24 * time.Hour, true
|
||||
case "7d":
|
||||
return 7 * 24 * time.Hour, true
|
||||
case "30d":
|
||||
return 30 * 24 * time.Hour, true
|
||||
default:
|
||||
return 0, false
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user