From 021abfca181af4f6f52f594200d948de04070119 Mon Sep 17 00:00:00 2001 From: Rose Ding Date: Mon, 9 Feb 2026 17:25:36 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=8D=95=E8=B4=A6=E5=8F=B7=E5=88=86?= =?UTF-8?q?=E7=BB=84=E9=A6=96=E6=AC=A1=20503=20=E4=B8=8D=E8=AE=BE=E6=A8=A1?= =?UTF-8?q?=E5=9E=8B=E9=99=90=E6=B5=81=E6=A0=87=E8=AE=B0=EF=BC=8C=E9=81=BF?= =?UTF-8?q?=E5=85=8D=E5=90=8E=E7=BB=AD=E8=AF=B7=E6=B1=82=E9=9B=AA=E5=B4=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 单账号 antigravity 分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时, 原逻辑会设置 ~29s 模型限流标记。由于只有一个账号无法切换, 后续所有新请求在预检查时命中限流 → 几毫秒内直接返回 503, 导致约 30 秒的雪崩窗口。 修复:在 Handler 入口处检查分组是否只有单个 antigravity 账号, 如果是则提前设置 SingleAccountRetry context 标记,让 Service 层 首次 503 就走原地重试逻辑(不设限流标记),避免污染后续请求。 --- backend/internal/handler/gateway_handler.go | 14 ++++++++++++++ backend/internal/handler/gemini_v1beta_handler.go | 7 +++++++ backend/internal/service/gateway_service.go | 11 +++++++++++ 3 files changed, 32 insertions(+) diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index 82181948..2b3703b4 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -238,6 +238,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) { var lastFailoverErr *service.UpstreamFailoverError var forceCacheBilling bool // 粘性会话切换时的缓存计费标记 + // 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。 + // 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。 + if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) { + ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true) + c.Request = c.Request.WithContext(ctx) + } + for { selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制 if err != nil { @@ -409,6 +416,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) { } fallbackUsed := false + // 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。 + // 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。 + if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), currentAPIKey.GroupID) { + ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true) + c.Request = c.Request.WithContext(ctx) + } + for { maxAccountSwitches := h.maxAccountSwitches switchCount := 0 diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go index 2b67cb1f..f8fb0dcb 100644 --- a/backend/internal/handler/gemini_v1beta_handler.go +++ b/backend/internal/handler/gemini_v1beta_handler.go @@ -327,6 +327,13 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { var lastFailoverErr *service.UpstreamFailoverError var forceCacheBilling bool // 粘性会话切换时的缓存计费标记 + // 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。 + // 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。 + if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) { + ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true) + c.Request = c.Request.WithContext(ctx) + } + for { selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs, "") // Gemini 不使用会话限制 if err != nil { diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go index 4e723232..2c04ae14 100644 --- a/backend/internal/service/gateway_service.go +++ b/backend/internal/service/gateway_service.go @@ -1683,6 +1683,17 @@ func (s *GatewayService) listSchedulableAccounts(ctx context.Context, groupID *i return accounts, useMixed, nil } +// IsSingleAntigravityAccountGroup 检查指定分组是否只有一个 antigravity 平台的可调度账号。 +// 用于 Handler 层在首次请求时提前设置 SingleAccountRetry context, +// 避免单账号分组收到 503 时错误地设置模型限流标记导致后续请求连续快速失败。 +func (s *GatewayService) IsSingleAntigravityAccountGroup(ctx context.Context, groupID *int64) bool { + accounts, _, err := s.listSchedulableAccounts(ctx, groupID, PlatformAntigravity, true) + if err != nil { + return false + } + return len(accounts) == 1 +} + func (s *GatewayService) isAccountAllowedForPlatform(account *Account, platform string, useMixed bool) bool { if account == nil { return false