feat(service): 实现运维监控业务逻辑层

- 新增 ops 主服务（ops_service.go）和端口定义（ops_port.go） - 实现账号可用性检查服务（ops_account_availability.go） - 实现数据聚合服务（ops_aggregation_service.go） - 实现告警评估服务（ops_alert_evaluator_service.go） - 实现告警管理服务（ops_alerts.go） - 实现数据清理服务（ops_cleanup_service.go） - 实现并发控制服务（ops_concurrency.go） - 实现仪表板服务（ops_dashboard.go） - 实现错误处理服务（ops_errors.go） - 实现直方图服务（ops_histograms.go） - 实现指标采集服务（ops_metrics_collector.go） - 实现查询模式服务（ops_query_mode.go） - 实现实时监控服务（ops_realtime.go） - 实现请求详情服务（ops_request_details.go） - 实现重试机制服务（ops_retry.go） - 实现配置管理服务（ops_settings.go） - 实现趋势分析服务（ops_trends.go） - 实现窗口统计服务（ops_window_stats.go） - 添加 ops 相关领域常量 - 注册 service 依赖注入
2026-01-09 20:53:44 +08:00
parent bb5303272b
commit 5baa8b5673
21 changed files with 5244 additions and 0 deletions
--- a/backend/internal/service/ops_retry.go
+++ b/backend/internal/service/ops_retry.go
@@ -0,0 +1,635 @@
+package service
+
+import (
+	"bytes"
+	"context"
+	"database/sql"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"log"
+	"net/http"
+	"strings"
+	"time"
+
+	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
+	"github.com/gin-gonic/gin"
+	"github.com/lib/pq"
+)
+
+const (
+	OpsRetryModeClient   = "client"
+	OpsRetryModeUpstream = "upstream"
+)
+
+const (
+	opsRetryStatusRunning   = "running"
+	opsRetryStatusSucceeded = "succeeded"
+	opsRetryStatusFailed    = "failed"
+)
+
+const (
+	opsRetryTimeout             = 60 * time.Second
+	opsRetryCaptureBytesLimit   = 64 * 1024
+	opsRetryResponsePreviewMax  = 8 * 1024
+	opsRetryMinIntervalPerError = 10 * time.Second
+	opsRetryMaxAccountSwitches  = 3
+)
+
+var opsRetryRequestHeaderAllowlist = map[string]bool{
+	"anthropic-beta":    true,
+	"anthropic-version": true,
+}
+
+type opsRetryRequestType string
+
+const (
+	opsRetryTypeMessages  opsRetryRequestType = "messages"
+	opsRetryTypeOpenAI    opsRetryRequestType = "openai_responses"
+	opsRetryTypeGeminiV1B opsRetryRequestType = "gemini_v1beta"
+)
+
+type limitedResponseWriter struct {
+	header      http.Header
+	status      int
+	wroteHeader bool
+
+	limit        int
+	totalWritten int64
+	buf          bytes.Buffer
+}
+
+func newLimitedResponseWriter(limit int) *limitedResponseWriter {
+	if limit <= 0 {
+		limit = 1
+	}
+	return &limitedResponseWriter{
+		header: make(http.Header),
+		status: http.StatusOK,
+		limit:  limit,
+	}
+}
+
+func (w *limitedResponseWriter) Header() http.Header {
+	return w.header
+}
+
+func (w *limitedResponseWriter) WriteHeader(statusCode int) {
+	if w.wroteHeader {
+		return
+	}
+	w.wroteHeader = true
+	w.status = statusCode
+}
+
+func (w *limitedResponseWriter) Write(p []byte) (int, error) {
+	if !w.wroteHeader {
+		w.WriteHeader(http.StatusOK)
+	}
+	w.totalWritten += int64(len(p))
+
+	if w.buf.Len() < w.limit {
+		remaining := w.limit - w.buf.Len()
+		if len(p) > remaining {
+			_, _ = w.buf.Write(p[:remaining])
+		} else {
+			_, _ = w.buf.Write(p)
+		}
+	}
+
+	// Pretend we wrote everything to avoid upstream/client code treating it as an error.
+	return len(p), nil
+}
+
+func (w *limitedResponseWriter) Flush() {}
+
+func (w *limitedResponseWriter) bodyBytes() []byte {
+	return w.buf.Bytes()
+}
+
+func (w *limitedResponseWriter) truncated() bool {
+	return w.totalWritten > int64(w.limit)
+}
+
+func (s *OpsService) RetryError(ctx context.Context, requestedByUserID int64, errorID int64, mode string, pinnedAccountID *int64) (*OpsRetryResult, error) {
+	if err := s.RequireMonitoringEnabled(ctx); err != nil {
+		return nil, err
+	}
+	if s.opsRepo == nil {
+		return nil, infraerrors.ServiceUnavailable("OPS_REPO_UNAVAILABLE", "Ops repository not available")
+	}
+
+	mode = strings.ToLower(strings.TrimSpace(mode))
+	switch mode {
+	case OpsRetryModeClient, OpsRetryModeUpstream:
+	default:
+		return nil, infraerrors.BadRequest("OPS_RETRY_INVALID_MODE", "mode must be client or upstream")
+	}
+
+	latest, err := s.opsRepo.GetLatestRetryAttemptForError(ctx, errorID)
+	if err != nil && !errors.Is(err, sql.ErrNoRows) {
+		return nil, infraerrors.InternalServer("OPS_RETRY_LOAD_LATEST_FAILED", "Failed to check retry status").WithCause(err)
+	}
+	if latest != nil {
+		if strings.EqualFold(latest.Status, opsRetryStatusRunning) || strings.EqualFold(latest.Status, "queued") {
+			return nil, infraerrors.Conflict("OPS_RETRY_IN_PROGRESS", "A retry is already in progress for this error")
+		}
+
+		lastAttemptAt := latest.CreatedAt
+		if latest.FinishedAt != nil && !latest.FinishedAt.IsZero() {
+			lastAttemptAt = *latest.FinishedAt
+		} else if latest.StartedAt != nil && !latest.StartedAt.IsZero() {
+			lastAttemptAt = *latest.StartedAt
+		}
+
+		if time.Since(lastAttemptAt) < opsRetryMinIntervalPerError {
+			return nil, infraerrors.Conflict("OPS_RETRY_TOO_FREQUENT", "Please wait before retrying this error again")
+		}
+	}
+
+	errorLog, err := s.GetErrorLogByID(ctx, errorID)
+	if err != nil {
+		return nil, err
+	}
+	if strings.TrimSpace(errorLog.RequestBody) == "" {
+		return nil, infraerrors.BadRequest("OPS_RETRY_NO_REQUEST_BODY", "No request body found to retry")
+	}
+
+	var pinned *int64
+	if mode == OpsRetryModeUpstream {
+		if pinnedAccountID != nil && *pinnedAccountID > 0 {
+			pinned = pinnedAccountID
+		} else if errorLog.AccountID != nil && *errorLog.AccountID > 0 {
+			pinned = errorLog.AccountID
+		} else {
+			return nil, infraerrors.BadRequest("OPS_RETRY_PINNED_ACCOUNT_REQUIRED", "pinned_account_id is required for upstream retry")
+		}
+	}
+
+	startedAt := time.Now()
+	attemptID, err := s.opsRepo.InsertRetryAttempt(ctx, &OpsInsertRetryAttemptInput{
+		RequestedByUserID: requestedByUserID,
+		SourceErrorID:     errorID,
+		Mode:              mode,
+		PinnedAccountID:   pinned,
+		Status:            opsRetryStatusRunning,
+		StartedAt:         startedAt,
+	})
+	if err != nil {
+		var pqErr *pq.Error
+		if errors.As(err, &pqErr) && string(pqErr.Code) == "23505" {
+			return nil, infraerrors.Conflict("OPS_RETRY_IN_PROGRESS", "A retry is already in progress for this error")
+		}
+		return nil, infraerrors.InternalServer("OPS_RETRY_CREATE_ATTEMPT_FAILED", "Failed to create retry attempt").WithCause(err)
+	}
+
+	result := &OpsRetryResult{
+		AttemptID:         attemptID,
+		Mode:              mode,
+		Status:            opsRetryStatusFailed,
+		PinnedAccountID:   pinned,
+		HTTPStatusCode:    0,
+		UpstreamRequestID: "",
+		ResponsePreview:   "",
+		ResponseTruncated: false,
+		ErrorMessage:      "",
+		StartedAt:         startedAt,
+	}
+
+	execCtx, cancel := context.WithTimeout(ctx, opsRetryTimeout)
+	defer cancel()
+
+	execRes := s.executeRetry(execCtx, errorLog, mode, pinned)
+
+	finishedAt := time.Now()
+	result.FinishedAt = finishedAt
+	result.DurationMs = finishedAt.Sub(startedAt).Milliseconds()
+
+	if execRes != nil {
+		result.Status = execRes.status
+		result.UsedAccountID = execRes.usedAccountID
+		result.HTTPStatusCode = execRes.httpStatusCode
+		result.UpstreamRequestID = execRes.upstreamRequestID
+		result.ResponsePreview = execRes.responsePreview
+		result.ResponseTruncated = execRes.responseTruncated
+		result.ErrorMessage = execRes.errorMessage
+	}
+
+	updateCtx, updateCancel := context.WithTimeout(context.Background(), 3*time.Second)
+	defer updateCancel()
+
+	var updateErrMsg *string
+	if strings.TrimSpace(result.ErrorMessage) != "" {
+		msg := result.ErrorMessage
+		updateErrMsg = &msg
+	}
+	var resultRequestID *string
+	if strings.TrimSpace(result.UpstreamRequestID) != "" {
+		v := result.UpstreamRequestID
+		resultRequestID = &v
+	}
+
+	finalStatus := result.Status
+	if strings.TrimSpace(finalStatus) == "" {
+		finalStatus = opsRetryStatusFailed
+	}
+
+	if err := s.opsRepo.UpdateRetryAttempt(updateCtx, &OpsUpdateRetryAttemptInput{
+		ID:              attemptID,
+		Status:          finalStatus,
+		FinishedAt:      finishedAt,
+		DurationMs:      result.DurationMs,
+		ResultRequestID: resultRequestID,
+		ErrorMessage:    updateErrMsg,
+	}); err != nil {
+		// Best-effort: retry itself already executed; do not fail the API response.
+		log.Printf("[Ops] UpdateRetryAttempt failed: %v", err)
+	}
+
+	return result, nil
+}
+
+type opsRetryExecution struct {
+	status string
+
+	usedAccountID     *int64
+	httpStatusCode    int
+	upstreamRequestID string
+
+	responsePreview   string
+	responseTruncated bool
+
+	errorMessage string
+}
+
+func (s *OpsService) executeRetry(ctx context.Context, errorLog *OpsErrorLogDetail, mode string, pinnedAccountID *int64) *opsRetryExecution {
+	if errorLog == nil {
+		return &opsRetryExecution{
+			status:       opsRetryStatusFailed,
+			errorMessage: "missing error log",
+		}
+	}
+
+	reqType := detectOpsRetryType(errorLog.RequestPath)
+	bodyBytes := []byte(errorLog.RequestBody)
+
+	switch reqType {
+	case opsRetryTypeMessages:
+		bodyBytes = FilterThinkingBlocksForRetry(bodyBytes)
+	case opsRetryTypeOpenAI, opsRetryTypeGeminiV1B:
+		// No-op
+	}
+
+	switch strings.ToLower(strings.TrimSpace(mode)) {
+	case OpsRetryModeUpstream:
+		if pinnedAccountID == nil || *pinnedAccountID <= 0 {
+			return &opsRetryExecution{
+				status:       opsRetryStatusFailed,
+				errorMessage: "pinned_account_id required for upstream retry",
+			}
+		}
+		return s.executePinnedRetry(ctx, reqType, errorLog, bodyBytes, *pinnedAccountID)
+	case OpsRetryModeClient:
+		return s.executeClientRetry(ctx, reqType, errorLog, bodyBytes)
+	default:
+		return &opsRetryExecution{
+			status:       opsRetryStatusFailed,
+			errorMessage: "invalid retry mode",
+		}
+	}
+}
+
+func detectOpsRetryType(path string) opsRetryRequestType {
+	p := strings.ToLower(strings.TrimSpace(path))
+	switch {
+	case strings.Contains(p, "/responses"):
+		return opsRetryTypeOpenAI
+	case strings.Contains(p, "/v1beta/"):
+		return opsRetryTypeGeminiV1B
+	default:
+		return opsRetryTypeMessages
+	}
+}
+
+func (s *OpsService) executePinnedRetry(ctx context.Context, reqType opsRetryRequestType, errorLog *OpsErrorLogDetail, body []byte, pinnedAccountID int64) *opsRetryExecution {
+	if s.accountRepo == nil {
+		return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "account repository not available"}
+	}
+
+	account, err := s.accountRepo.GetByID(ctx, pinnedAccountID)
+	if err != nil {
+		return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: fmt.Sprintf("account not found: %v", err)}
+	}
+	if account == nil {
+		return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "account not found"}
+	}
+	if !account.IsSchedulable() {
+		return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "account is not schedulable"}
+	}
+	if errorLog.GroupID != nil && *errorLog.GroupID > 0 {
+		if !containsInt64(account.GroupIDs, *errorLog.GroupID) {
+			return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "pinned account is not in the same group as the original request"}
+		}
+	}
+
+	var release func()
+	if s.concurrencyService != nil {
+		acq, err := s.concurrencyService.AcquireAccountSlot(ctx, account.ID, account.Concurrency)
+		if err != nil {
+			return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: fmt.Sprintf("acquire account slot failed: %v", err)}
+		}
+		if acq == nil || !acq.Acquired {
+			return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "account concurrency limit reached"}
+		}
+		release = acq.ReleaseFunc
+	}
+	if release != nil {
+		defer release()
+	}
+
+	usedID := account.ID
+	exec := s.executeWithAccount(ctx, reqType, errorLog, body, account)
+	exec.usedAccountID = &usedID
+	if exec.status == "" {
+		exec.status = opsRetryStatusFailed
+	}
+	return exec
+}
+
+func (s *OpsService) executeClientRetry(ctx context.Context, reqType opsRetryRequestType, errorLog *OpsErrorLogDetail, body []byte) *opsRetryExecution {
+	groupID := errorLog.GroupID
+	if groupID == nil || *groupID <= 0 {
+		return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "group_id missing; cannot reselect account"}
+	}
+
+	model, stream, parsedErr := extractRetryModelAndStream(reqType, errorLog, body)
+	if parsedErr != nil {
+		return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: parsedErr.Error()}
+	}
+	_ = stream
+
+	excluded := make(map[int64]struct{})
+	switches := 0
+
+	for {
+		if switches >= opsRetryMaxAccountSwitches {
+			return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "retry failed after exhausting account failovers"}
+		}
+
+		selection, selErr := s.selectAccountForRetry(ctx, reqType, groupID, model, excluded)
+		if selErr != nil {
+			return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: selErr.Error()}
+		}
+		if selection == nil || selection.Account == nil {
+			return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "no available accounts"}
+		}
+
+		account := selection.Account
+		if !selection.Acquired || selection.ReleaseFunc == nil {
+			excluded[account.ID] = struct{}{}
+			switches++
+			continue
+		}
+
+		exec := func() *opsRetryExecution {
+			defer selection.ReleaseFunc()
+			return s.executeWithAccount(ctx, reqType, errorLog, body, account)
+		}()
+
+		if exec != nil {
+			if exec.status == opsRetryStatusSucceeded {
+				usedID := account.ID
+				exec.usedAccountID = &usedID
+				return exec
+			}
+			// If the gateway services ask for failover, try another account.
+			if s.isFailoverError(exec.errorMessage) {
+				excluded[account.ID] = struct{}{}
+				switches++
+				continue
+			}
+			usedID := account.ID
+			exec.usedAccountID = &usedID
+			return exec
+		}
+
+		excluded[account.ID] = struct{}{}
+		switches++
+	}
+}
+
+func (s *OpsService) selectAccountForRetry(ctx context.Context, reqType opsRetryRequestType, groupID *int64, model string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) {
+	switch reqType {
+	case opsRetryTypeOpenAI:
+		if s.openAIGatewayService == nil {
+			return nil, fmt.Errorf("openai gateway service not available")
+		}
+		return s.openAIGatewayService.SelectAccountWithLoadAwareness(ctx, groupID, "", model, excludedIDs)
+	case opsRetryTypeGeminiV1B, opsRetryTypeMessages:
+		if s.gatewayService == nil {
+			return nil, fmt.Errorf("gateway service not available")
+		}
+		return s.gatewayService.SelectAccountWithLoadAwareness(ctx, groupID, "", model, excludedIDs)
+	default:
+		return nil, fmt.Errorf("unsupported retry type: %s", reqType)
+	}
+}
+
+func extractRetryModelAndStream(reqType opsRetryRequestType, errorLog *OpsErrorLogDetail, body []byte) (model string, stream bool, err error) {
+	switch reqType {
+	case opsRetryTypeMessages:
+		parsed, parseErr := ParseGatewayRequest(body)
+		if parseErr != nil {
+			return "", false, fmt.Errorf("failed to parse messages request body: %w", parseErr)
+		}
+		return parsed.Model, parsed.Stream, nil
+	case opsRetryTypeOpenAI:
+		var v struct {
+			Model  string `json:"model"`
+			Stream bool   `json:"stream"`
+		}
+		if err := json.Unmarshal(body, &v); err != nil {
+			return "", false, fmt.Errorf("failed to parse openai request body: %w", err)
+		}
+		return strings.TrimSpace(v.Model), v.Stream, nil
+	case opsRetryTypeGeminiV1B:
+		if strings.TrimSpace(errorLog.Model) == "" {
+			return "", false, fmt.Errorf("missing model for gemini v1beta retry")
+		}
+		return strings.TrimSpace(errorLog.Model), errorLog.Stream, nil
+	default:
+		return "", false, fmt.Errorf("unsupported retry type: %s", reqType)
+	}
+}
+
+func (s *OpsService) executeWithAccount(ctx context.Context, reqType opsRetryRequestType, errorLog *OpsErrorLogDetail, body []byte, account *Account) *opsRetryExecution {
+	if account == nil {
+		return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "missing account"}
+	}
+
+	c, w := newOpsRetryContext(ctx, errorLog)
+
+	var err error
+	switch reqType {
+	case opsRetryTypeOpenAI:
+		if s.openAIGatewayService == nil {
+			return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "openai gateway service not available"}
+		}
+		_, err = s.openAIGatewayService.Forward(ctx, c, account, body)
+	case opsRetryTypeGeminiV1B:
+		if s.geminiCompatService == nil || s.antigravityGatewayService == nil {
+			return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "gemini services not available"}
+		}
+		modelName := strings.TrimSpace(errorLog.Model)
+		action := "generateContent"
+		if errorLog.Stream {
+			action = "streamGenerateContent"
+		}
+		if account.Platform == PlatformAntigravity {
+			_, err = s.antigravityGatewayService.ForwardGemini(ctx, c, account, modelName, action, errorLog.Stream, body)
+		} else {
+			_, err = s.geminiCompatService.ForwardNative(ctx, c, account, modelName, action, errorLog.Stream, body)
+		}
+	case opsRetryTypeMessages:
+		switch account.Platform {
+		case PlatformAntigravity:
+			if s.antigravityGatewayService == nil {
+				return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "antigravity gateway service not available"}
+			}
+			_, err = s.antigravityGatewayService.Forward(ctx, c, account, body)
+		case PlatformGemini:
+			if s.geminiCompatService == nil {
+				return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "gemini gateway service not available"}
+			}
+			_, err = s.geminiCompatService.Forward(ctx, c, account, body)
+		default:
+			if s.gatewayService == nil {
+				return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "gateway service not available"}
+			}
+			parsedReq, parseErr := ParseGatewayRequest(body)
+			if parseErr != nil {
+				return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "failed to parse request body"}
+			}
+			_, err = s.gatewayService.Forward(ctx, c, account, parsedReq)
+		}
+	default:
+		return &opsRetryExecution{status: opsRetryStatusFailed, errorMessage: "unsupported retry type"}
+	}
+
+	statusCode := http.StatusOK
+	if c != nil && c.Writer != nil {
+		statusCode = c.Writer.Status()
+	}
+
+	upstreamReqID := extractUpstreamRequestID(c)
+	preview, truncated := extractResponsePreview(w)
+
+	exec := &opsRetryExecution{
+		status:            opsRetryStatusFailed,
+		httpStatusCode:    statusCode,
+		upstreamRequestID: upstreamReqID,
+		responsePreview:   preview,
+		responseTruncated: truncated,
+		errorMessage:      "",
+	}
+
+	if err == nil && statusCode < 400 {
+		exec.status = opsRetryStatusSucceeded
+		return exec
+	}
+
+	if err != nil {
+		exec.errorMessage = err.Error()
+	} else {
+		exec.errorMessage = fmt.Sprintf("upstream returned status %d", statusCode)
+	}
+
+	return exec
+}
+
+func newOpsRetryContext(ctx context.Context, errorLog *OpsErrorLogDetail) (*gin.Context, *limitedResponseWriter) {
+	w := newLimitedResponseWriter(opsRetryCaptureBytesLimit)
+	c, _ := gin.CreateTestContext(w)
+
+	path := "/"
+	if errorLog != nil && strings.TrimSpace(errorLog.RequestPath) != "" {
+		path = errorLog.RequestPath
+	}
+
+	req, _ := http.NewRequestWithContext(ctx, http.MethodPost, "http://localhost"+path, bytes.NewReader(nil))
+	req.Header.Set("content-type", "application/json")
+	if errorLog != nil && strings.TrimSpace(errorLog.UserAgent) != "" {
+		req.Header.Set("user-agent", errorLog.UserAgent)
+	}
+	// Restore a minimal, whitelisted subset of request headers to improve retry fidelity
+	// (e.g. anthropic-beta / anthropic-version). Never replay auth credentials.
+	if errorLog != nil && strings.TrimSpace(errorLog.RequestHeaders) != "" {
+		var stored map[string]string
+		if err := json.Unmarshal([]byte(errorLog.RequestHeaders), &stored); err == nil {
+			for k, v := range stored {
+				key := strings.TrimSpace(k)
+				if key == "" {
+					continue
+				}
+				if !opsRetryRequestHeaderAllowlist[strings.ToLower(key)] {
+					continue
+				}
+				val := strings.TrimSpace(v)
+				if val == "" {
+					continue
+				}
+				req.Header.Set(key, val)
+			}
+		}
+	}
+
+	c.Request = req
+	return c, w
+}
+
+func extractUpstreamRequestID(c *gin.Context) string {
+	if c == nil || c.Writer == nil {
+		return ""
+	}
+	h := c.Writer.Header()
+	if h == nil {
+		return ""
+	}
+	for _, key := range []string{"x-request-id", "X-Request-Id", "X-Request-ID"} {
+		if v := strings.TrimSpace(h.Get(key)); v != "" {
+			return v
+		}
+	}
+	return ""
+}
+
+func extractResponsePreview(w *limitedResponseWriter) (preview string, truncated bool) {
+	if w == nil {
+		return "", false
+	}
+	b := bytes.TrimSpace(w.bodyBytes())
+	if len(b) == 0 {
+		return "", w.truncated()
+	}
+	if len(b) > opsRetryResponsePreviewMax {
+		return string(b[:opsRetryResponsePreviewMax]), true
+	}
+	return string(b), w.truncated()
+}
+
+func containsInt64(items []int64, needle int64) bool {
+	for _, v := range items {
+		if v == needle {
+			return true
+		}
+	}
+	return false
+}
+
+func (s *OpsService) isFailoverError(message string) bool {
+	msg := strings.ToLower(strings.TrimSpace(message))
+	if msg == "" {
+		return false
+	}
+	return strings.Contains(msg, "upstream error:") && strings.Contains(msg, "failover")
+}