From 7c4309ea240056b69bbcf33aec0583d185abe3bc Mon Sep 17 00:00:00 2001 From: IanShaw027 <131567472+IanShaw027@users.noreply.github.com> Date: Wed, 14 Jan 2026 14:29:01 +0800 Subject: [PATCH] =?UTF-8?q?feat(ops):=20=E6=B7=BB=E5=8A=A0ops=20handler?= =?UTF-8?q?=E5=92=8C=E8=B7=AF=E7=94=B1=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/internal/handler/admin/ops_handler.go | 354 ++++++++++++++++++ backend/internal/server/routes/admin.go | 15 +- 2 files changed, 368 insertions(+), 1 deletion(-) diff --git a/backend/internal/handler/admin/ops_handler.go b/backend/internal/handler/admin/ops_handler.go index ec7a8b75..630d5665 100644 --- a/backend/internal/handler/admin/ops_handler.go +++ b/backend/internal/handler/admin/ops_handler.go @@ -19,6 +19,29 @@ type OpsHandler struct { opsService *service.OpsService } +const ( + opsListViewErrors = "errors" + opsListViewExcluded = "excluded" + opsListViewAll = "all" +) + +func parseOpsViewParam(c *gin.Context) string { + if c == nil { + return "" + } + v := strings.ToLower(strings.TrimSpace(c.Query("view"))) + switch v { + case "", opsListViewErrors: + return opsListViewErrors + case opsListViewExcluded: + return opsListViewExcluded + case opsListViewAll: + return opsListViewAll + default: + return opsListViewErrors + } +} + func NewOpsHandler(opsService *service.OpsService) *OpsHandler { return &OpsHandler{opsService: opsService} } @@ -86,6 +109,7 @@ func (h *OpsHandler) GetErrorLogs(c *gin.Context) { if source := strings.TrimSpace(c.Query("error_source")); source != "" { filter.Source = source } + filter.View = parseOpsViewParam(c) if v := strings.TrimSpace(c.Query("resolved")); v != "" { switch strings.ToLower(v) { case "1", "true", "yes": @@ -157,6 +181,336 @@ func (h *OpsHandler) GetErrorLogByID(c *gin.Context) { response.Success(c, detail) } +// ==================== New split endpoints ==================== + +// ListRequestErrors lists client-visible request errors. +// GET /api/v1/admin/ops/request-errors +func (h *OpsHandler) ListRequestErrors(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + page, pageSize := response.ParsePagination(c) + if pageSize > 500 { + pageSize = 500 + } + startTime, endTime, err := parseOpsTimeRange(c, "1h") + if err != nil { + response.BadRequest(c, err.Error()) + return + } + + filter := &service.OpsErrorLogFilter{Page: page, PageSize: pageSize} + if !startTime.IsZero() { + filter.StartTime = &startTime + } + if !endTime.IsZero() { + filter.EndTime = &endTime + } + filter.View = parseOpsViewParam(c) + filter.Phase = strings.TrimSpace(c.Query("phase")) + filter.Owner = strings.TrimSpace(c.Query("error_owner")) + filter.Source = strings.TrimSpace(c.Query("error_source")) + filter.Query = strings.TrimSpace(c.Query("q")) + + // Force request errors: client-visible status >= 400. + // buildOpsErrorLogsWhere already applies this for non-upstream phase. + if strings.EqualFold(strings.TrimSpace(filter.Phase), "upstream") { + filter.Phase = "" + } + + if platform := strings.TrimSpace(c.Query("platform")); platform != "" { + filter.Platform = platform + } + if v := strings.TrimSpace(c.Query("group_id")); v != "" { + id, err := strconv.ParseInt(v, 10, 64) + if err != nil || id <= 0 { + response.BadRequest(c, "Invalid group_id") + return + } + filter.GroupID = &id + } + if v := strings.TrimSpace(c.Query("account_id")); v != "" { + id, err := strconv.ParseInt(v, 10, 64) + if err != nil || id <= 0 { + response.BadRequest(c, "Invalid account_id") + return + } + filter.AccountID = &id + } + + if v := strings.TrimSpace(c.Query("resolved")); v != "" { + switch strings.ToLower(v) { + case "1", "true", "yes": + b := true + filter.Resolved = &b + case "0", "false", "no": + b := false + filter.Resolved = &b + default: + response.BadRequest(c, "Invalid resolved") + return + } + } + if statusCodesStr := strings.TrimSpace(c.Query("status_codes")); statusCodesStr != "" { + parts := strings.Split(statusCodesStr, ",") + out := make([]int, 0, len(parts)) + for _, part := range parts { + p := strings.TrimSpace(part) + if p == "" { + continue + } + n, err := strconv.Atoi(p) + if err != nil || n < 0 { + response.BadRequest(c, "Invalid status_codes") + return + } + out = append(out, n) + } + filter.StatusCodes = out + } + + result, err := h.opsService.GetErrorLogs(c.Request.Context(), filter) + if err != nil { + response.ErrorFrom(c, err) + return + } + response.Paginated(c, result.Errors, int64(result.Total), result.Page, result.PageSize) +} + +// GetRequestError returns request error detail. +// GET /api/v1/admin/ops/request-errors/:id +func (h *OpsHandler) GetRequestError(c *gin.Context) { + // same storage; just proxy to existing detail + h.GetErrorLogByID(c) +} + +// RetryRequestErrorClient retries the client request based on stored request body. +// POST /api/v1/admin/ops/request-errors/:id/retry-client +func (h *OpsHandler) RetryRequestErrorClient(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + subject, ok := middleware.GetAuthSubjectFromContext(c) + if !ok || subject.UserID <= 0 { + response.Error(c, http.StatusUnauthorized, "Unauthorized") + return + } + + idStr := strings.TrimSpace(c.Param("id")) + id, err := strconv.ParseInt(idStr, 10, 64) + if err != nil || id <= 0 { + response.BadRequest(c, "Invalid error id") + return + } + + result, err := h.opsService.RetryError(c.Request.Context(), subject.UserID, id, service.OpsRetryModeClient, nil) + if err != nil { + response.ErrorFrom(c, err) + return + } + response.Success(c, result) +} + +// RetryRequestErrorUpstreamEvent retries a specific upstream attempt using captured upstream_request_body. +// POST /api/v1/admin/ops/request-errors/:id/upstream-errors/:idx/retry +func (h *OpsHandler) RetryRequestErrorUpstreamEvent(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + subject, ok := middleware.GetAuthSubjectFromContext(c) + if !ok || subject.UserID <= 0 { + response.Error(c, http.StatusUnauthorized, "Unauthorized") + return + } + + idStr := strings.TrimSpace(c.Param("id")) + id, err := strconv.ParseInt(idStr, 10, 64) + if err != nil || id <= 0 { + response.BadRequest(c, "Invalid error id") + return + } + + idxStr := strings.TrimSpace(c.Param("idx")) + idx, err := strconv.Atoi(idxStr) + if err != nil || idx < 0 { + response.BadRequest(c, "Invalid upstream idx") + return + } + + result, err := h.opsService.RetryUpstreamEvent(c.Request.Context(), subject.UserID, id, idx) + if err != nil { + response.ErrorFrom(c, err) + return + } + response.Success(c, result) +} + +// ResolveRequestError toggles resolved status. +// PUT /api/v1/admin/ops/request-errors/:id/resolve +func (h *OpsHandler) ResolveRequestError(c *gin.Context) { + h.UpdateErrorResolution(c) +} + +// ListUpstreamErrors lists independent upstream errors. +// GET /api/v1/admin/ops/upstream-errors +func (h *OpsHandler) ListUpstreamErrors(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + page, pageSize := response.ParsePagination(c) + if pageSize > 500 { + pageSize = 500 + } + startTime, endTime, err := parseOpsTimeRange(c, "1h") + if err != nil { + response.BadRequest(c, err.Error()) + return + } + + filter := &service.OpsErrorLogFilter{Page: page, PageSize: pageSize} + if !startTime.IsZero() { + filter.StartTime = &startTime + } + if !endTime.IsZero() { + filter.EndTime = &endTime + } + + filter.View = parseOpsViewParam(c) + filter.Phase = "upstream" + filter.Owner = "provider" + filter.Source = strings.TrimSpace(c.Query("error_source")) + filter.Query = strings.TrimSpace(c.Query("q")) + + if platform := strings.TrimSpace(c.Query("platform")); platform != "" { + filter.Platform = platform + } + if v := strings.TrimSpace(c.Query("group_id")); v != "" { + id, err := strconv.ParseInt(v, 10, 64) + if err != nil || id <= 0 { + response.BadRequest(c, "Invalid group_id") + return + } + filter.GroupID = &id + } + if v := strings.TrimSpace(c.Query("account_id")); v != "" { + id, err := strconv.ParseInt(v, 10, 64) + if err != nil || id <= 0 { + response.BadRequest(c, "Invalid account_id") + return + } + filter.AccountID = &id + } + + if v := strings.TrimSpace(c.Query("resolved")); v != "" { + switch strings.ToLower(v) { + case "1", "true", "yes": + b := true + filter.Resolved = &b + case "0", "false", "no": + b := false + filter.Resolved = &b + default: + response.BadRequest(c, "Invalid resolved") + return + } + } + if statusCodesStr := strings.TrimSpace(c.Query("status_codes")); statusCodesStr != "" { + parts := strings.Split(statusCodesStr, ",") + out := make([]int, 0, len(parts)) + for _, part := range parts { + p := strings.TrimSpace(part) + if p == "" { + continue + } + n, err := strconv.Atoi(p) + if err != nil || n < 0 { + response.BadRequest(c, "Invalid status_codes") + return + } + out = append(out, n) + } + filter.StatusCodes = out + } + + result, err := h.opsService.GetErrorLogs(c.Request.Context(), filter) + if err != nil { + response.ErrorFrom(c, err) + return + } + response.Paginated(c, result.Errors, int64(result.Total), result.Page, result.PageSize) +} + +// GetUpstreamError returns upstream error detail. +// GET /api/v1/admin/ops/upstream-errors/:id +func (h *OpsHandler) GetUpstreamError(c *gin.Context) { + h.GetErrorLogByID(c) +} + +// RetryUpstreamError retries upstream error using the original account_id. +// POST /api/v1/admin/ops/upstream-errors/:id/retry +func (h *OpsHandler) RetryUpstreamError(c *gin.Context) { + if h.opsService == nil { + response.Error(c, http.StatusServiceUnavailable, "Ops service not available") + return + } + if err := h.opsService.RequireMonitoringEnabled(c.Request.Context()); err != nil { + response.ErrorFrom(c, err) + return + } + + subject, ok := middleware.GetAuthSubjectFromContext(c) + if !ok || subject.UserID <= 0 { + response.Error(c, http.StatusUnauthorized, "Unauthorized") + return + } + + idStr := strings.TrimSpace(c.Param("id")) + id, err := strconv.ParseInt(idStr, 10, 64) + if err != nil || id <= 0 { + response.BadRequest(c, "Invalid error id") + return + } + + result, err := h.opsService.RetryError(c.Request.Context(), subject.UserID, id, service.OpsRetryModeUpstream, nil) + if err != nil { + response.ErrorFrom(c, err) + return + } + response.Success(c, result) +} + +// ResolveUpstreamError toggles resolved status. +// PUT /api/v1/admin/ops/upstream-errors/:id/resolve +func (h *OpsHandler) ResolveUpstreamError(c *gin.Context) { + h.UpdateErrorResolution(c) +} + +// ==================== Existing endpoints ==================== + // ListRequestDetails returns a request-level list (success + error) for drill-down. // GET /api/v1/admin/ops/requests func (h *OpsHandler) ListRequestDetails(c *gin.Context) { diff --git a/backend/internal/server/routes/admin.go b/backend/internal/server/routes/admin.go index adae7cdd..53702766 100644 --- a/backend/internal/server/routes/admin.go +++ b/backend/internal/server/routes/admin.go @@ -113,13 +113,26 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) { ws.GET("/qps", h.Admin.Ops.QPSWSHandler) } - // Error logs (MVP-1) + // Error logs (legacy) ops.GET("/errors", h.Admin.Ops.GetErrorLogs) ops.GET("/errors/:id", h.Admin.Ops.GetErrorLogByID) ops.GET("/errors/:id/retries", h.Admin.Ops.ListRetryAttempts) ops.POST("/errors/:id/retry", h.Admin.Ops.RetryErrorRequest) ops.PUT("/errors/:id/resolve", h.Admin.Ops.UpdateErrorResolution) + // Request errors (client-visible failures) + ops.GET("/request-errors", h.Admin.Ops.ListRequestErrors) + ops.GET("/request-errors/:id", h.Admin.Ops.GetRequestError) + ops.POST("/request-errors/:id/retry-client", h.Admin.Ops.RetryRequestErrorClient) + ops.POST("/request-errors/:id/upstream-errors/:idx/retry", h.Admin.Ops.RetryRequestErrorUpstreamEvent) + ops.PUT("/request-errors/:id/resolve", h.Admin.Ops.ResolveRequestError) + + // Upstream errors (independent upstream failures) + ops.GET("/upstream-errors", h.Admin.Ops.ListUpstreamErrors) + ops.GET("/upstream-errors/:id", h.Admin.Ops.GetUpstreamError) + ops.POST("/upstream-errors/:id/retry", h.Admin.Ops.RetryUpstreamError) + ops.PUT("/upstream-errors/:id/resolve", h.Admin.Ops.ResolveUpstreamError) + // Request drilldown (success + error) ops.GET("/requests", h.Admin.Ops.ListRequestDetails)