diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go
index af20e318..415a38de 100644
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -240,6 +240,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
var lastFailoverErr *service.UpstreamFailoverError
var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
+ // 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。
+ // 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。
+ if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
+ ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+ c.Request = c.Request.WithContext(ctx)
+ }
+
for {
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制
if err != nil {
@@ -248,6 +255,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
return
}
+ // Antigravity 单账号退避重试:分组内没有其他可用账号时,
+ // 对 503 错误不直接返回,而是清除排除列表、等待退避后重试同一个账号。
+ // 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。
+ if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
+ if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
+ log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
+ failedAccountIDs = make(map[int64]struct{})
+ // 设置 context 标记,让 Service 层预检查等待限流过期而非直接切换
+ ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+ c.Request = c.Request.WithContext(ctx)
+ continue
+ }
+ }
if lastFailoverErr != nil {
h.handleFailoverExhausted(c, lastFailoverErr, service.PlatformGemini, streamStarted)
} else {
@@ -397,6 +417,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
}
fallbackUsed := false
+ // 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。
+ // 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。
+ if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), currentAPIKey.GroupID) {
+ ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+ c.Request = c.Request.WithContext(ctx)
+ }
+
for {
maxAccountSwitches := h.maxAccountSwitches
switchCount := 0
@@ -414,6 +441,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
return
}
+ // Antigravity 单账号退避重试:分组内没有其他可用账号时,
+ // 对 503 错误不直接返回,而是清除排除列表、等待退避后重试同一个账号。
+ // 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。
+ if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
+ if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
+ log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
+ failedAccountIDs = make(map[int64]struct{})
+ // 设置 context 标记,让 Service 层预检查等待限流过期而非直接切换
+ ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+ c.Request = c.Request.WithContext(ctx)
+ continue
+ }
+ }
if lastFailoverErr != nil {
h.handleFailoverExhausted(c, lastFailoverErr, platform, streamStarted)
} else {
@@ -851,6 +891,27 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
}
}
+// sleepAntigravitySingleAccountBackoff Antigravity 平台单账号分组的 503 退避重试延时。
+// 当分组内只有一个可用账号且上游返回 503(MODEL_CAPACITY_EXHAUSTED)时使用,
+// 采用短固定延时策略。Service 层在 SingleAccountRetry 模式下已经做了充分的原地重试
+// (最多 3 次、总等待 30s),所以 Handler 层的退避只需短暂等待即可。
+// 返回 false 表示 context 已取消。
+func sleepAntigravitySingleAccountBackoff(ctx context.Context, retryCount int) bool {
+ // 固定短延时:2s
+ // Service 层已经在原地等待了足够长的时间(retryDelay × 重试次数),
+ // Handler 层只需短暂间隔后重新进入 Service 层即可。
+ const delay = 2 * time.Second
+
+ log.Printf("Antigravity single-account 503 backoff: waiting %v before retry (attempt %d)", delay, retryCount)
+
+ select {
+ case <-ctx.Done():
+ return false
+ case <-time.After(delay):
+ return true
+ }
+}
+
func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
statusCode := failoverErr.StatusCode
responseBody := failoverErr.ResponseBody
diff --git a/backend/internal/handler/gateway_handler_single_account_retry_test.go b/backend/internal/handler/gateway_handler_single_account_retry_test.go
new file mode 100644
index 00000000..96aa14c6
--- /dev/null
+++ b/backend/internal/handler/gateway_handler_single_account_retry_test.go
@@ -0,0 +1,51 @@
+package handler
+
+import (
+ "context"
+ "testing"
+ "time"
+
+ "github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// sleepAntigravitySingleAccountBackoff 测试
+// ---------------------------------------------------------------------------
+
+func TestSleepAntigravitySingleAccountBackoff_ReturnsTrue(t *testing.T) {
+ ctx := context.Background()
+ start := time.Now()
+ ok := sleepAntigravitySingleAccountBackoff(ctx, 1)
+ elapsed := time.Since(start)
+
+ require.True(t, ok, "should return true when context is not canceled")
+ // 固定延迟 2s
+ require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond, "should wait approximately 2s")
+ require.Less(t, elapsed, 5*time.Second, "should not wait too long")
+}
+
+func TestSleepAntigravitySingleAccountBackoff_ContextCanceled(t *testing.T) {
+ ctx, cancel := context.WithCancel(context.Background())
+ cancel() // 立即取消
+
+ start := time.Now()
+ ok := sleepAntigravitySingleAccountBackoff(ctx, 1)
+ elapsed := time.Since(start)
+
+ require.False(t, ok, "should return false when context is canceled")
+ require.Less(t, elapsed, 500*time.Millisecond, "should return immediately on cancel")
+}
+
+func TestSleepAntigravitySingleAccountBackoff_FixedDelay(t *testing.T) {
+ // 验证不同 retryCount 都使用固定 2s 延迟
+ ctx := context.Background()
+
+ start := time.Now()
+ ok := sleepAntigravitySingleAccountBackoff(ctx, 5)
+ elapsed := time.Since(start)
+
+ require.True(t, ok)
+ // 即使 retryCount=5,延迟仍然是固定的 2s
+ require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond)
+ require.Less(t, elapsed, 5*time.Second)
+}
diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go
index d5149f22..f8fb0dcb 100644
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -327,6 +327,13 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
var lastFailoverErr *service.UpstreamFailoverError
var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
+ // 单账号分组提前设置 SingleAccountRetry 标记,让 Service 层首次 503 就不设模型限流标记。
+ // 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流,导致后续请求连续快速失败。
+ if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
+ ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+ c.Request = c.Request.WithContext(ctx)
+ }
+
for {
selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs, "") // Gemini 不使用会话限制
if err != nil {
@@ -334,6 +341,19 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
return
}
+ // Antigravity 单账号退避重试:分组内没有其他可用账号时,
+ // 对 503 错误不直接返回,而是清除排除列表、等待退避后重试同一个账号。
+ // 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。
+ if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
+ if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
+ log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
+ failedAccountIDs = make(map[int64]struct{})
+ // 设置 context 标记,让 Service 层预检查等待限流过期而非直接切换
+ ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+ c.Request = c.Request.WithContext(ctx)
+ continue
+ }
+ }
h.handleGeminiFailoverExhausted(c, lastFailoverErr)
return
}
diff --git a/backend/internal/pkg/antigravity/request_transformer.go b/backend/internal/pkg/antigravity/request_transformer.go
index 65f45cfc..e89a4c53 100644
--- a/backend/internal/pkg/antigravity/request_transformer.go
+++ b/backend/internal/pkg/antigravity/request_transformer.go
@@ -271,6 +271,21 @@ func filterOpenCodePrompt(text string) string {
return ""
}
+// systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表
+var systemBlockFilterPrefixes = []string{
+ "x-anthropic-billing-header",
+}
+
+// filterSystemBlockByPrefix 如果文本匹配过滤前缀,返回空字符串
+func filterSystemBlockByPrefix(text string) string {
+ for _, prefix := range systemBlockFilterPrefixes {
+ if strings.HasPrefix(text, prefix) {
+ return ""
+ }
+ }
+ return text
+}
+
// buildSystemInstruction 构建 systemInstruction(与 Antigravity-Manager 保持一致)
func buildSystemInstruction(system json.RawMessage, modelName string, opts TransformOptions, tools []ClaudeTool) *GeminiContent {
var parts []GeminiPart
@@ -287,8 +302,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans
if strings.Contains(sysStr, "You are Antigravity") {
userHasAntigravityIdentity = true
}
- // 过滤 OpenCode 默认提示词
- filtered := filterOpenCodePrompt(sysStr)
+ // 过滤 OpenCode 默认提示词和黑名单前缀
+ filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(sysStr))
if filtered != "" {
userSystemParts = append(userSystemParts, GeminiPart{Text: filtered})
}
@@ -302,8 +317,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans
if strings.Contains(block.Text, "You are Antigravity") {
userHasAntigravityIdentity = true
}
- // 过滤 OpenCode 默认提示词
- filtered := filterOpenCodePrompt(block.Text)
+ // 过滤 OpenCode 默认提示词和黑名单前缀
+ filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(block.Text))
if filtered != "" {
userSystemParts = append(userSystemParts, GeminiPart{Text: filtered})
}
diff --git a/backend/internal/pkg/ctxkey/ctxkey.go b/backend/internal/pkg/ctxkey/ctxkey.go
index 9bf563e7..0c4d82f7 100644
--- a/backend/internal/pkg/ctxkey/ctxkey.go
+++ b/backend/internal/pkg/ctxkey/ctxkey.go
@@ -28,4 +28,8 @@ const (
// IsMaxTokensOneHaikuRequest 标识当前请求是否为 max_tokens=1 + haiku 模型的探测请求
// 用于 ClaudeCodeOnly 验证绕过(绕过 system prompt 检查,但仍需验证 User-Agent)
IsMaxTokensOneHaikuRequest Key = "ctx_is_max_tokens_one_haiku"
+
+ // SingleAccountRetry 标识当前请求处于单账号 503 退避重试模式。
+ // 在此模式下,Service 层的模型限流预检查将等待限流过期而非直接切换账号。
+ SingleAccountRetry Key = "ctx_single_account_retry"
)
diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go
index 7abe4f3a..457dd964 100644
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -20,6 +20,7 @@ import (
"time"
"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
+ "github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/tidwall/gjson"
@@ -47,6 +48,19 @@ const (
googleRPCTypeErrorInfo = "type.googleapis.com/google.rpc.ErrorInfo"
googleRPCReasonModelCapacityExhausted = "MODEL_CAPACITY_EXHAUSTED"
googleRPCReasonRateLimitExceeded = "RATE_LIMIT_EXCEEDED"
+
+ // 单账号 503 退避重试:Service 层原地重试的最大次数
+ // 在 handleSmartRetry 中,对于 shouldRateLimitModel(长延迟 ≥ 7s)的情况,
+ // 多账号模式下会设限流+切换账号;但单账号模式下改为原地等待+重试。
+ antigravitySingleAccountSmartRetryMaxAttempts = 3
+
+ // 单账号 503 退避重试:原地重试时单次最大等待时间
+ // 防止上游返回过长的 retryDelay 导致请求卡住太久
+ antigravitySingleAccountSmartRetryMaxWait = 15 * time.Second
+
+ // 单账号 503 退避重试:原地重试的总累计等待时间上限
+ // 超过此上限将不再重试,直接返回 503
+ antigravitySingleAccountSmartRetryTotalMaxWait = 30 * time.Second
)
// antigravityPassthroughErrorMessages 透传给客户端的错误消息白名单(小写)
@@ -149,6 +163,13 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
// 情况1: retryDelay >= 阈值,限流模型并切换账号
if shouldRateLimitModel {
+ // 单账号 503 退避重试模式:不设限流、不切换账号,改为原地等待+重试
+ // 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的,等几秒就能恢复。
+ // 多账号场景下切换账号是最优选择,但单账号场景下设限流毫无意义(只会导致双重等待)。
+ if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) {
+ return s.handleSingleAccountRetryInPlace(p, resp, respBody, baseURL, waitDuration, modelName)
+ }
+
rateLimitDuration := waitDuration
if rateLimitDuration <= 0 {
rateLimitDuration = antigravityDefaultRateLimitDuration
@@ -237,7 +258,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
}
}
- // 所有重试都失败,限流当前模型并切换账号
+ // 所有重试都失败
rateLimitDuration := waitDuration
if rateLimitDuration <= 0 {
rateLimitDuration = antigravityDefaultRateLimitDuration
@@ -246,6 +267,22 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
if retryBody == nil {
retryBody = respBody
}
+
+ // 单账号 503 退避重试模式:智能重试耗尽后不设限流、不切换账号,
+ // 直接返回 503 让 Handler 层的单账号退避循环做最终处理。
+ if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) {
+ log.Printf("%s status=%d smart_retry_exhausted_single_account attempts=%d model=%s account=%d body=%s (return 503 directly)",
+ p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200))
+ return &smartRetryResult{
+ action: smartRetryActionBreakWithResp,
+ resp: &http.Response{
+ StatusCode: resp.StatusCode,
+ Header: resp.Header.Clone(),
+ Body: io.NopCloser(bytes.NewReader(retryBody)),
+ },
+ }
+ }
+
log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)",
p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200))
@@ -280,17 +317,152 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
return &smartRetryResult{action: smartRetryActionContinue}
}
+// handleSingleAccountRetryInPlace 单账号 503 退避重试的原地重试逻辑。
+//
+// 在多账号场景下,收到 503 + 长 retryDelay(≥ 7s)时会设置模型限流 + 切换账号;
+// 但在单账号场景下,设限流毫无意义(因为切换回来的还是同一个账号,还要等限流过期)。
+// 此方法改为在 Service 层原地等待 + 重试,避免双重等待问题:
+//
+// 旧流程:Service 设限流 → Handler 退避等待 → Service 等限流过期 → 再请求(总耗时 = 退避 + 限流)
+// 新流程:Service 直接等 retryDelay → 重试 → 成功/再等 → 重试...(总耗时 ≈ 实际 retryDelay × 重试次数)
+//
+// 约束:
+// - 单次等待不超过 antigravitySingleAccountSmartRetryMaxWait
+// - 总累计等待不超过 antigravitySingleAccountSmartRetryTotalMaxWait
+// - 最多重试 antigravitySingleAccountSmartRetryMaxAttempts 次
+func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace(
+ p antigravityRetryLoopParams,
+ resp *http.Response,
+ respBody []byte,
+ baseURL string,
+ waitDuration time.Duration,
+ modelName string,
+) *smartRetryResult {
+ // 限制单次等待时间
+ if waitDuration > antigravitySingleAccountSmartRetryMaxWait {
+ waitDuration = antigravitySingleAccountSmartRetryMaxWait
+ }
+ if waitDuration < antigravitySmartRetryMinWait {
+ waitDuration = antigravitySmartRetryMinWait
+ }
+
+ log.Printf("%s status=%d single_account_503_retry_in_place model=%s account=%d upstream_retry_delay=%v (retrying in-place instead of rate-limiting)",
+ p.prefix, resp.StatusCode, modelName, p.account.ID, waitDuration)
+
+ var lastRetryResp *http.Response
+ var lastRetryBody []byte
+ totalWaited := time.Duration(0)
+
+ for attempt := 1; attempt <= antigravitySingleAccountSmartRetryMaxAttempts; attempt++ {
+ // 检查累计等待是否超限
+ if totalWaited+waitDuration > antigravitySingleAccountSmartRetryTotalMaxWait {
+ remaining := antigravitySingleAccountSmartRetryTotalMaxWait - totalWaited
+ if remaining <= 0 {
+ log.Printf("%s single_account_503_retry: total_wait_exceeded total=%v max=%v, giving up",
+ p.prefix, totalWaited, antigravitySingleAccountSmartRetryTotalMaxWait)
+ break
+ }
+ waitDuration = remaining
+ }
+
+ log.Printf("%s status=%d single_account_503_retry attempt=%d/%d delay=%v total_waited=%v model=%s account=%d",
+ p.prefix, resp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, waitDuration, totalWaited, modelName, p.account.ID)
+
+ select {
+ case <-p.ctx.Done():
+ log.Printf("%s status=context_canceled_during_single_account_retry", p.prefix)
+ return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
+ case <-time.After(waitDuration):
+ }
+ totalWaited += waitDuration
+
+ // 创建新请求
+ retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
+ if err != nil {
+ log.Printf("%s single_account_503_retry: request_build_failed error=%v", p.prefix, err)
+ break
+ }
+
+ retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
+ if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
+ log.Printf("%s status=%d single_account_503_retry_success attempt=%d/%d total_waited=%v",
+ p.prefix, retryResp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited)
+ // 关闭之前的响应
+ if lastRetryResp != nil {
+ _ = lastRetryResp.Body.Close()
+ }
+ return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
+ }
+
+ // 网络错误时继续重试
+ if retryErr != nil || retryResp == nil {
+ log.Printf("%s single_account_503_retry: network_error attempt=%d/%d error=%v",
+ p.prefix, attempt, antigravitySingleAccountSmartRetryMaxAttempts, retryErr)
+ continue
+ }
+
+ // 关闭之前的响应
+ if lastRetryResp != nil {
+ _ = lastRetryResp.Body.Close()
+ }
+ lastRetryResp = retryResp
+ lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
+ _ = retryResp.Body.Close()
+
+ // 解析新的重试信息,更新下次等待时间
+ if attempt < antigravitySingleAccountSmartRetryMaxAttempts && lastRetryBody != nil {
+ _, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
+ if newWaitDuration > 0 {
+ waitDuration = newWaitDuration
+ if waitDuration > antigravitySingleAccountSmartRetryMaxWait {
+ waitDuration = antigravitySingleAccountSmartRetryMaxWait
+ }
+ if waitDuration < antigravitySmartRetryMinWait {
+ waitDuration = antigravitySmartRetryMinWait
+ }
+ }
+ }
+ }
+
+ // 所有重试都失败,不设限流,直接返回 503
+ // Handler 层的单账号退避循环会做最终处理
+ retryBody := lastRetryBody
+ if retryBody == nil {
+ retryBody = respBody
+ }
+ log.Printf("%s status=%d single_account_503_retry_exhausted attempts=%d total_waited=%v model=%s account=%d body=%s (return 503 directly)",
+ p.prefix, resp.StatusCode, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited, modelName, p.account.ID, truncateForLog(retryBody, 200))
+
+ return &smartRetryResult{
+ action: smartRetryActionBreakWithResp,
+ resp: &http.Response{
+ StatusCode: resp.StatusCode,
+ Header: resp.Header.Clone(),
+ Body: io.NopCloser(bytes.NewReader(retryBody)),
+ },
+ }
+}
+
// antigravityRetryLoop 执行带 URL fallback 的重试循环
func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) {
// 预检查:如果账号已限流,直接返回切换信号
if p.requestedModel != "" {
if remaining := p.account.GetRateLimitRemainingTimeWithContext(p.ctx, p.requestedModel); remaining > 0 {
- log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
- p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
- return nil, &AntigravityAccountSwitchError{
- OriginalAccountID: p.account.ID,
- RateLimitedModel: p.requestedModel,
- IsStickySession: p.isStickySession,
+ // 单账号 503 退避重试模式:跳过限流预检查,直接发请求。
+ // 首次请求设的限流是为了多账号调度器跳过该账号,在单账号模式下无意义。
+ // 如果上游确实还不可用,handleSmartRetry → handleSingleAccountRetryInPlace
+ // 会在 Service 层原地等待+重试,不需要在预检查这里等。
+ if isSingleAccountRetry(p.ctx) {
+ log.Printf("%s pre_check: single_account_retry skipping rate_limit remaining=%v model=%s account=%d (will retry in-place if 503)",
+ p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
+ } else {
+ log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
+ p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
+ return nil, &AntigravityAccountSwitchError{
+ OriginalAccountID: p.account.ID,
+ RateLimitedModel: p.requestedModel,
+ IsStickySession: p.isStickySession,
+ }
}
}
}
@@ -372,12 +544,12 @@ urlFallbackLoop:
_ = resp.Body.Close()
// ★ 统一入口:自定义错误码 + 临时不可调度
- if handled, policyErr := s.applyErrorPolicy(p, resp.StatusCode, resp.Header, respBody); handled {
+ if handled, outStatus, policyErr := s.applyErrorPolicy(p, resp.StatusCode, resp.Header, respBody); handled {
if policyErr != nil {
return nil, policyErr
}
resp = &http.Response{
- StatusCode: resp.StatusCode,
+ StatusCode: outStatus,
Header: resp.Header.Clone(),
Body: io.NopCloser(bytes.NewReader(respBody)),
}
@@ -611,21 +783,22 @@ func (s *AntigravityGatewayService) checkErrorPolicy(ctx context.Context, accoun
return s.rateLimitService.CheckErrorPolicy(ctx, account, statusCode, body)
}
-// applyErrorPolicy 应用错误策略结果,返回是否应终止当前循环
-func (s *AntigravityGatewayService) applyErrorPolicy(p antigravityRetryLoopParams, statusCode int, headers http.Header, respBody []byte) (handled bool, retErr error) {
+// applyErrorPolicy 应用错误策略结果,返回是否应终止当前循环及应返回的状态码。
+// ErrorPolicySkipped 时 outStatus 为 500(前端约定:未命中的错误返回 500)。
+func (s *AntigravityGatewayService) applyErrorPolicy(p antigravityRetryLoopParams, statusCode int, headers http.Header, respBody []byte) (handled bool, outStatus int, retErr error) {
switch s.checkErrorPolicy(p.ctx, p.account, statusCode, respBody) {
case ErrorPolicySkipped:
- return true, nil
+ return true, http.StatusInternalServerError, nil
case ErrorPolicyMatched:
_ = p.handleError(p.ctx, p.prefix, p.account, statusCode, headers, respBody,
p.requestedModel, p.groupID, p.sessionHash, p.isStickySession)
- return true, nil
+ return true, statusCode, nil
case ErrorPolicyTempUnscheduled:
slog.Info("temp_unschedulable_matched",
"prefix", p.prefix, "status_code", statusCode, "account_id", p.account.ID)
- return true, &AntigravityAccountSwitchError{OriginalAccountID: p.account.ID, IsStickySession: p.isStickySession}
+ return true, statusCode, &AntigravityAccountSwitchError{OriginalAccountID: p.account.ID, IsStickySession: p.isStickySession}
}
- return false, nil
+ return false, statusCode, nil
}
// mapAntigravityModel 获取映射后的模型名
@@ -1940,6 +2113,12 @@ func sleepAntigravityBackoffWithContext(ctx context.Context, attempt int) bool {
}
}
+// isSingleAccountRetry 检查 context 中是否设置了单账号退避重试标记
+func isSingleAccountRetry(ctx context.Context) bool {
+ v, _ := ctx.Value(ctxkey.SingleAccountRetry).(bool)
+ return v
+}
+
// setModelRateLimitByModelName 使用官方模型 ID 设置模型级限流
// 直接使用上游返回的模型 ID(如 claude-sonnet-4-5)作为限流 key
// 返回是否已成功设置(若模型名为空或 repo 为 nil 将返回 false)
@@ -2239,6 +2418,10 @@ func (s *AntigravityGatewayService) handleUpstreamError(
requestedModel string,
groupID int64, sessionHash string, isStickySession bool,
) *handleModelRateLimitResult {
+ // 遵守自定义错误码策略:未命中则跳过所有限流处理
+ if !account.ShouldHandleErrorCode(statusCode) {
+ return nil
+ }
// 模型级限流处理(优先)
result := s.handleModelRateLimit(&handleModelRateLimitParams{
ctx: ctx,
diff --git a/backend/internal/service/antigravity_single_account_retry_test.go b/backend/internal/service/antigravity_single_account_retry_test.go
new file mode 100644
index 00000000..d5813553
--- /dev/null
+++ b/backend/internal/service/antigravity_single_account_retry_test.go
@@ -0,0 +1,902 @@
+//go:build unit
+
+package service
+
+import (
+ "bytes"
+ "context"
+ "io"
+ "net/http"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
+ "github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// 辅助函数:构造带 SingleAccountRetry 标记的 context
+// ---------------------------------------------------------------------------
+
+func ctxWithSingleAccountRetry() context.Context {
+ return context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true)
+}
+
+// ---------------------------------------------------------------------------
+// 1. isSingleAccountRetry 测试
+// ---------------------------------------------------------------------------
+
+func TestIsSingleAccountRetry_True(t *testing.T) {
+ ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true)
+ require.True(t, isSingleAccountRetry(ctx))
+}
+
+func TestIsSingleAccountRetry_False_NoValue(t *testing.T) {
+ require.False(t, isSingleAccountRetry(context.Background()))
+}
+
+func TestIsSingleAccountRetry_False_ExplicitFalse(t *testing.T) {
+ ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, false)
+ require.False(t, isSingleAccountRetry(ctx))
+}
+
+func TestIsSingleAccountRetry_False_WrongType(t *testing.T) {
+ ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, "true")
+ require.False(t, isSingleAccountRetry(ctx))
+}
+
+// ---------------------------------------------------------------------------
+// 2. 常量验证
+// ---------------------------------------------------------------------------
+
+func TestSingleAccountRetryConstants(t *testing.T) {
+ require.Equal(t, 3, antigravitySingleAccountSmartRetryMaxAttempts,
+ "单账号原地重试最多 3 次")
+ require.Equal(t, 15*time.Second, antigravitySingleAccountSmartRetryMaxWait,
+ "单次最大等待 15s")
+ require.Equal(t, 30*time.Second, antigravitySingleAccountSmartRetryTotalMaxWait,
+ "总累计等待不超过 30s")
+}
+
+// ---------------------------------------------------------------------------
+// 3. handleSmartRetry + 503 + SingleAccountRetry → 走 handleSingleAccountRetryInPlace
+// (而非设模型限流 + 切换账号)
+// ---------------------------------------------------------------------------
+
+// TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace
+// 核心场景:503 + retryDelay >= 7s + SingleAccountRetry 标记
+// → 不设模型限流、不切换账号,改为原地重试
+func TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace(t *testing.T) {
+ // 原地重试成功
+ successResp := &http.Response{
+ StatusCode: http.StatusOK,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+ }
+ upstream := &mockSmartRetryUpstream{
+ responses: []*http.Response{successResp},
+ errors: []error{nil},
+ }
+
+ repo := &stubAntigravityAccountRepo{}
+ account := &Account{
+ ID: 1,
+ Name: "acc-single",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Concurrency: 1,
+ }
+
+ // 503 + 39s >= 7s 阈值 + MODEL_CAPACITY_EXHAUSTED
+ respBody := []byte(`{
+ "error": {
+ "code": 503,
+ "status": "UNAVAILABLE",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
+ ],
+ "message": "No capacity available for model gemini-3-pro-high on the server"
+ }
+ }`)
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ Body: io.NopCloser(bytes.NewReader(respBody)),
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(), // 关键:设置单账号标记
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ accountRepo: repo,
+ handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+ return nil
+ },
+ }
+
+ availableURLs := []string{"https://ag-1.test"}
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ // 关键断言:返回 resp(原地重试成功),而非 switchError(切换账号)
+ require.NotNil(t, result.resp, "should return successful response from in-place retry")
+ require.Equal(t, http.StatusOK, result.resp.StatusCode)
+ require.Nil(t, result.switchError, "should NOT return switchError in single account mode")
+ require.Nil(t, result.err)
+
+ // 验证未设模型限流(单账号模式不应设限流)
+ require.Len(t, repo.modelRateLimitCalls, 0,
+ "should NOT set model rate limit in single account retry mode")
+
+ // 验证确实调用了 upstream(原地重试)
+ require.GreaterOrEqual(t, len(upstream.calls), 1, "should have made at least one retry call")
+}
+
+// TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches
+// 对照组:503 + retryDelay >= 7s + 无 SingleAccountRetry 标记
+// → 照常设模型限流 + 切换账号
+func TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches(t *testing.T) {
+ repo := &stubAntigravityAccountRepo{}
+ account := &Account{
+ ID: 2,
+ Name: "acc-multi",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ }
+
+ // 503 + 39s >= 7s 阈值
+ respBody := []byte(`{
+ "error": {
+ "code": 503,
+ "status": "UNAVAILABLE",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
+ ]
+ }
+ }`)
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ Body: io.NopCloser(bytes.NewReader(respBody)),
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: context.Background(), // 关键:无单账号标记
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ accountRepo: repo,
+ handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+ return nil
+ },
+ }
+
+ availableURLs := []string{"https://ag-1.test"}
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ // 对照:多账号模式返回 switchError
+ require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
+ require.Nil(t, result.resp, "should not return resp when switchError is set")
+
+ // 对照:多账号模式应设模型限流
+ require.Len(t, repo.modelRateLimitCalls, 1,
+ "multi-account mode SHOULD set model rate limit")
+}
+
+// TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches
+// 边界情况:429(非 503)+ SingleAccountRetry 标记
+// → 单账号原地重试仅针对 503,429 依然走切换账号逻辑
+func TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches(t *testing.T) {
+ repo := &stubAntigravityAccountRepo{}
+ account := &Account{
+ ID: 3,
+ Name: "acc-429",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ }
+
+ // 429 + 15s >= 7s 阈值
+ respBody := []byte(`{
+ "error": {
+ "status": "RESOURCE_EXHAUSTED",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "15s"}
+ ]
+ }
+ }`)
+ resp := &http.Response{
+ StatusCode: http.StatusTooManyRequests, // 429,不是 503
+ Header: http.Header{},
+ Body: io.NopCloser(bytes.NewReader(respBody)),
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(), // 有单账号标记
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ accountRepo: repo,
+ handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+ return nil
+ },
+ }
+
+ availableURLs := []string{"https://ag-1.test"}
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ // 429 即使有单账号标记,也应走切换账号
+ require.NotNil(t, result.switchError, "429 should still return switchError even with SingleAccountRetry")
+ require.Len(t, repo.modelRateLimitCalls, 1,
+ "429 should still set model rate limit even with SingleAccountRetry")
+}
+
+// ---------------------------------------------------------------------------
+// 4. handleSmartRetry + 503 + 短延迟 + SingleAccountRetry → 智能重试耗尽后不设限流
+// ---------------------------------------------------------------------------
+
+// TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit
+// 503 + retryDelay < 7s + SingleAccountRetry → 智能重试耗尽后直接返回 503,不设限流
+func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testing.T) {
+ // 智能重试也返回 503
+ failRespBody := `{
+ "error": {
+ "code": 503,
+ "status": "UNAVAILABLE",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+ ]
+ }
+ }`
+ failResp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(failRespBody)),
+ }
+ upstream := &mockSmartRetryUpstream{
+ responses: []*http.Response{failResp},
+ errors: []error{nil},
+ }
+
+ repo := &stubAntigravityAccountRepo{}
+ account := &Account{
+ ID: 4,
+ Name: "acc-short-503",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ }
+
+ // 0.1s < 7s 阈值
+ respBody := []byte(`{
+ "error": {
+ "code": 503,
+ "status": "UNAVAILABLE",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+ ]
+ }
+ }`)
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ Body: io.NopCloser(bytes.NewReader(respBody)),
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(),
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ accountRepo: repo,
+ handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+ return nil
+ },
+ }
+
+ availableURLs := []string{"https://ag-1.test"}
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ // 关键断言:单账号 503 模式下,智能重试耗尽后直接返回 503 响应,不切换
+ require.NotNil(t, result.resp, "should return 503 response directly for single account mode")
+ require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode)
+ require.Nil(t, result.switchError, "should NOT switch account in single account mode")
+
+ // 关键断言:不设模型限流
+ require.Len(t, repo.modelRateLimitCalls, 0,
+ "should NOT set model rate limit for 503 in single account mode")
+}
+
+// TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit
+// 对照组:503 + retryDelay < 7s + 无 SingleAccountRetry → 智能重试耗尽后照常设限流
+func TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit(t *testing.T) {
+ failRespBody := `{
+ "error": {
+ "code": 503,
+ "status": "UNAVAILABLE",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+ ]
+ }
+ }`
+ failResp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(failRespBody)),
+ }
+ upstream := &mockSmartRetryUpstream{
+ responses: []*http.Response{failResp},
+ errors: []error{nil},
+ }
+
+ repo := &stubAntigravityAccountRepo{}
+ account := &Account{
+ ID: 5,
+ Name: "acc-multi-503",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ }
+
+ respBody := []byte(`{
+ "error": {
+ "code": 503,
+ "status": "UNAVAILABLE",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+ ]
+ }
+ }`)
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ Body: io.NopCloser(bytes.NewReader(respBody)),
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: context.Background(), // 无单账号标记
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ accountRepo: repo,
+ handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+ return nil
+ },
+ }
+
+ availableURLs := []string{"https://ag-1.test"}
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ // 对照:多账号模式应返回 switchError
+ require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
+ // 对照:多账号模式应设模型限流
+ require.Len(t, repo.modelRateLimitCalls, 1,
+ "multi-account mode should set model rate limit")
+}
+
+// ---------------------------------------------------------------------------
+// 5. handleSingleAccountRetryInPlace 直接测试
+// ---------------------------------------------------------------------------
+
+// TestHandleSingleAccountRetryInPlace_Success 原地重试成功
+func TestHandleSingleAccountRetryInPlace_Success(t *testing.T) {
+ successResp := &http.Response{
+ StatusCode: http.StatusOK,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+ }
+ upstream := &mockSmartRetryUpstream{
+ responses: []*http.Response{successResp},
+ errors: []error{nil},
+ }
+
+ account := &Account{
+ ID: 10,
+ Name: "acc-inplace-ok",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Concurrency: 1,
+ }
+
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(),
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ }
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ require.NotNil(t, result.resp, "should return successful response")
+ require.Equal(t, http.StatusOK, result.resp.StatusCode)
+ require.Nil(t, result.switchError, "should not switch account on success")
+ require.Nil(t, result.err)
+}
+
+// TestHandleSingleAccountRetryInPlace_AllRetriesFail 所有重试都失败,返回 503(不设限流)
+func TestHandleSingleAccountRetryInPlace_AllRetriesFail(t *testing.T) {
+ // 构造 3 个 503 响应(对应 3 次原地重试)
+ var responses []*http.Response
+ var errors []error
+ for i := 0; i < antigravitySingleAccountSmartRetryMaxAttempts; i++ {
+ responses = append(responses, &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(`{
+ "error": {
+ "code": 503,
+ "status": "UNAVAILABLE",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+ ]
+ }
+ }`)),
+ })
+ errors = append(errors, nil)
+ }
+ upstream := &mockSmartRetryUpstream{
+ responses: responses,
+ errors: errors,
+ }
+
+ account := &Account{
+ ID: 11,
+ Name: "acc-inplace-fail",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Concurrency: 1,
+ }
+
+ origBody := []byte(`{"error":{"code":503,"status":"UNAVAILABLE"}}`)
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{"X-Test": {"original"}},
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(),
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ }
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSingleAccountRetryInPlace(params, resp, origBody, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ // 关键:返回 503 resp,不返回 switchError
+ require.NotNil(t, result.resp, "should return 503 response directly")
+ require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode)
+ require.Nil(t, result.switchError, "should NOT return switchError - let Handler handle it")
+ require.Nil(t, result.err)
+
+ // 验证确实重试了指定次数
+ require.Len(t, upstream.calls, antigravitySingleAccountSmartRetryMaxAttempts,
+ "should have made exactly maxAttempts retry calls")
+}
+
+// TestHandleSingleAccountRetryInPlace_WaitDurationClamped 等待时间被限制在 [min, max] 范围
+func TestHandleSingleAccountRetryInPlace_WaitDurationClamped(t *testing.T) {
+ // 用短延迟的成功响应,只验证不 panic
+ successResp := &http.Response{
+ StatusCode: http.StatusOK,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+ }
+ upstream := &mockSmartRetryUpstream{
+ responses: []*http.Response{successResp},
+ errors: []error{nil},
+ }
+
+ account := &Account{
+ ID: 12,
+ Name: "acc-clamp",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Concurrency: 1,
+ }
+
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(),
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ }
+
+ svc := &AntigravityGatewayService{}
+
+ // 等待时间过大应被 clamp 到 antigravitySingleAccountSmartRetryMaxWait
+ result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 999*time.Second, "gemini-3-pro")
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ require.NotNil(t, result.resp)
+ require.Equal(t, http.StatusOK, result.resp.StatusCode)
+}
+
+// TestHandleSingleAccountRetryInPlace_ContextCanceled context 取消时立即返回
+func TestHandleSingleAccountRetryInPlace_ContextCanceled(t *testing.T) {
+ upstream := &mockSmartRetryUpstream{
+ responses: []*http.Response{nil},
+ errors: []error{nil},
+ }
+
+ account := &Account{
+ ID: 13,
+ Name: "acc-cancel",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Concurrency: 1,
+ }
+
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ }
+
+ ctx, cancel := context.WithCancel(context.Background())
+ ctx = context.WithValue(ctx, ctxkey.SingleAccountRetry, true)
+ cancel() // 立即取消
+
+ params := antigravityRetryLoopParams{
+ ctx: ctx,
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ }
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ require.Error(t, result.err, "should return context error")
+ // 不应调用 upstream(因为在等待阶段就被取消了)
+ require.Len(t, upstream.calls, 0, "should not call upstream when context is canceled")
+}
+
+// TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry 网络错误时继续重试
+func TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry(t *testing.T) {
+ successResp := &http.Response{
+ StatusCode: http.StatusOK,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+ }
+ upstream := &mockSmartRetryUpstream{
+ // 第1次网络错误(nil resp),第2次成功
+ responses: []*http.Response{nil, successResp},
+ errors: []error{nil, nil},
+ }
+
+ account := &Account{
+ ID: 14,
+ Name: "acc-net-retry",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Concurrency: 1,
+ }
+
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(),
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ }
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ require.NotNil(t, result.resp, "should return successful response after network error recovery")
+ require.Equal(t, http.StatusOK, result.resp.StatusCode)
+ require.Len(t, upstream.calls, 2, "first call fails (network error), second succeeds")
+}
+
+// ---------------------------------------------------------------------------
+// 6. antigravityRetryLoop 预检查:单账号模式跳过限流
+// ---------------------------------------------------------------------------
+
+// TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit
+// 预检查中,如果有 SingleAccountRetry 标记,即使账号已限流也跳过直接发请求
+func TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit(t *testing.T) {
+ // 创建一个已设模型限流的账号
+ upstream := &recordingOKUpstream{}
+ account := &Account{
+ ID: 20,
+ Name: "acc-rate-limited",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Schedulable: true,
+ Status: StatusActive,
+ Concurrency: 1,
+ Extra: map[string]any{
+ modelRateLimitsKey: map[string]any{
+ "claude-sonnet-4-5": map[string]any{
+ "rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339),
+ },
+ },
+ },
+ }
+
+ svc := &AntigravityGatewayService{}
+ result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(),
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ requestedModel: "claude-sonnet-4-5",
+ handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+ return nil
+ },
+ })
+
+ require.NoError(t, err, "should not return error")
+ require.NotNil(t, result, "should return result")
+ require.NotNil(t, result.resp, "should have response")
+ require.Equal(t, http.StatusOK, result.resp.StatusCode)
+ // 关键:尽管限流了,有 SingleAccountRetry 标记时仍然到达了 upstream
+ require.Equal(t, 1, upstream.calls, "should have reached upstream despite rate limit")
+}
+
+// TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit
+// 对照组:无 SingleAccountRetry + 已限流 → 预检查返回 switchError
+func TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit(t *testing.T) {
+ upstream := &recordingOKUpstream{}
+ account := &Account{
+ ID: 21,
+ Name: "acc-rate-limited-multi",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Schedulable: true,
+ Status: StatusActive,
+ Concurrency: 1,
+ Extra: map[string]any{
+ modelRateLimitsKey: map[string]any{
+ "claude-sonnet-4-5": map[string]any{
+ "rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339),
+ },
+ },
+ },
+ }
+
+ svc := &AntigravityGatewayService{}
+ result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+ ctx: context.Background(), // 无单账号标记
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ requestedModel: "claude-sonnet-4-5",
+ handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+ return nil
+ },
+ })
+
+ require.Nil(t, result, "should not return result on rate limit switch")
+ require.NotNil(t, err, "should return error")
+
+ var switchErr *AntigravityAccountSwitchError
+ require.ErrorAs(t, err, &switchErr, "should return AntigravityAccountSwitchError")
+ require.Equal(t, account.ID, switchErr.OriginalAccountID)
+ require.Equal(t, "claude-sonnet-4-5", switchErr.RateLimitedModel)
+
+ // upstream 不应被调用(预检查就短路了)
+ require.Equal(t, 0, upstream.calls, "upstream should NOT be called when pre-check blocks")
+}
+
+// ---------------------------------------------------------------------------
+// 7. 端到端集成场景测试
+// ---------------------------------------------------------------------------
+
+// TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E
+// 端到端场景:503 + 单账号 + 原地重试第2次成功
+func TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E(t *testing.T) {
+ // 第1次原地重试仍返回 503,第2次成功
+ fail503Body := `{
+ "error": {
+ "code": 503,
+ "status": "UNAVAILABLE",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+ ]
+ }
+ }`
+ resp503 := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(fail503Body)),
+ }
+ successResp := &http.Response{
+ StatusCode: http.StatusOK,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+ }
+
+ upstream := &mockSmartRetryUpstream{
+ responses: []*http.Response{resp503, successResp},
+ errors: []error{nil, nil},
+ }
+
+ account := &Account{
+ ID: 30,
+ Name: "acc-e2e",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Concurrency: 1,
+ }
+
+ resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ }
+
+ params := antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(),
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ }
+
+ svc := &AntigravityGatewayService{}
+ result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+ require.NotNil(t, result)
+ require.Equal(t, smartRetryActionBreakWithResp, result.action)
+ require.NotNil(t, result.resp, "should return successful response after 2nd attempt")
+ require.Equal(t, http.StatusOK, result.resp.StatusCode)
+ require.Nil(t, result.switchError)
+ require.Len(t, upstream.calls, 2, "first 503, second OK")
+}
+
+// TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E
+// 通过 antigravityRetryLoop → handleSmartRetry → handleSingleAccountRetryInPlace 完整链路
+func TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E(t *testing.T) {
+ // 初始请求返回 503 + 长延迟
+ initial503Body := []byte(`{
+ "error": {
+ "code": 503,
+ "status": "UNAVAILABLE",
+ "details": [
+ {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+ {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "10s"}
+ ],
+ "message": "No capacity available"
+ }
+ }`)
+ initial503Resp := &http.Response{
+ StatusCode: http.StatusServiceUnavailable,
+ Header: http.Header{},
+ Body: io.NopCloser(bytes.NewReader(initial503Body)),
+ }
+
+ // 原地重试成功
+ successResp := &http.Response{
+ StatusCode: http.StatusOK,
+ Header: http.Header{},
+ Body: io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+ }
+
+ upstream := &mockSmartRetryUpstream{
+ // 第1次调用(retryLoop 主循环)返回 503
+ // 第2次调用(handleSingleAccountRetryInPlace 原地重试)返回 200
+ responses: []*http.Response{initial503Resp, successResp},
+ errors: []error{nil, nil},
+ }
+
+ repo := &stubAntigravityAccountRepo{}
+ account := &Account{
+ ID: 31,
+ Name: "acc-e2e-loop",
+ Type: AccountTypeOAuth,
+ Platform: PlatformAntigravity,
+ Schedulable: true,
+ Status: StatusActive,
+ Concurrency: 1,
+ }
+
+ svc := &AntigravityGatewayService{}
+ result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+ ctx: ctxWithSingleAccountRetry(),
+ prefix: "[test]",
+ account: account,
+ accessToken: "token",
+ action: "generateContent",
+ body: []byte(`{"input":"test"}`),
+ httpUpstream: upstream,
+ accountRepo: repo,
+ handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+ return nil
+ },
+ })
+
+ require.NoError(t, err, "should not return error on successful retry")
+ require.NotNil(t, result, "should return result")
+ require.NotNil(t, result.resp, "should return response")
+ require.Equal(t, http.StatusOK, result.resp.StatusCode)
+
+ // 验证未设模型限流
+ require.Len(t, repo.modelRateLimitCalls, 0,
+ "should NOT set model rate limit in single account retry mode")
+}
diff --git a/backend/internal/service/error_policy_integration_test.go b/backend/internal/service/error_policy_integration_test.go
index 9f8ad938..a8b42a2c 100644
--- a/backend/internal/service/error_policy_integration_test.go
+++ b/backend/internal/service/error_policy_integration_test.go
@@ -116,7 +116,7 @@ func TestRetryLoop_ErrorPolicy_CustomErrorCodes(t *testing.T) {
customCodes: []any{float64(500)},
expectHandleError: 0,
expectUpstream: 1,
- expectStatusCode: 429,
+ expectStatusCode: 500,
},
{
name: "500_in_custom_codes_matched",
@@ -364,3 +364,109 @@ func TestRetryLoop_ErrorPolicy_NoPolicy_OriginalBehavior(t *testing.T) {
require.Equal(t, antigravityMaxRetries, upstream.calls, "should exhaust all retries")
require.Equal(t, 1, handleErrorCount, "handleError should be called once after retries exhausted")
}
+
+// ---------------------------------------------------------------------------
+// epTrackingRepo — records SetRateLimited / SetError calls for verification.
+// ---------------------------------------------------------------------------
+
+type epTrackingRepo struct {
+ mockAccountRepoForGemini
+ rateLimitedCalls int
+ rateLimitedID int64
+ setErrCalls int
+ setErrID int64
+ tempCalls int
+}
+
+func (r *epTrackingRepo) SetRateLimited(_ context.Context, id int64, _ time.Time) error {
+ r.rateLimitedCalls++
+ r.rateLimitedID = id
+ return nil
+}
+
+func (r *epTrackingRepo) SetError(_ context.Context, id int64, _ string) error {
+ r.setErrCalls++
+ r.setErrID = id
+ return nil
+}
+
+func (r *epTrackingRepo) SetTempUnschedulable(_ context.Context, _ int64, _ time.Time, _ string) error {
+ r.tempCalls++
+ return nil
+}
+
+// ---------------------------------------------------------------------------
+// TestCustomErrorCode599_SkippedErrors_Return500_NoRateLimit
+//
+// 核心场景:自定义错误码设为 [599](一个不会真正出现的错误码),
+// 当上游返回 429/500/503/401 时:
+// - 返回给客户端的状态码必须是 500(而不是透传原始状态码)
+// - 不调用 SetRateLimited(不进入限流状态)
+// - 不调用 SetError(不停止调度)
+// - 不调用 handleError
+// ---------------------------------------------------------------------------
+
+func TestCustomErrorCode599_SkippedErrors_Return500_NoRateLimit(t *testing.T) {
+ errorCodes := []int{429, 500, 503, 401, 403}
+
+ for _, upstreamStatus := range errorCodes {
+ t.Run(http.StatusText(upstreamStatus), func(t *testing.T) {
+ saveAndSetBaseURLs(t)
+
+ upstream := &epFixedUpstream{
+ statusCode: upstreamStatus,
+ body: `{"error":"some upstream error"}`,
+ }
+ repo := &epTrackingRepo{}
+ rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+ svc := &AntigravityGatewayService{rateLimitService: rlSvc}
+
+ account := &Account{
+ ID: 500,
+ Type: AccountTypeAPIKey,
+ Platform: PlatformAntigravity,
+ Schedulable: true,
+ Status: StatusActive,
+ Concurrency: 1,
+ Credentials: map[string]any{
+ "custom_error_codes_enabled": true,
+ "custom_error_codes": []any{float64(599)},
+ },
+ }
+
+ var handleErrorCount int
+ p := newRetryParams(account, upstream, func(_ context.Context, _ string, _ *Account, _ int, _ http.Header, _ []byte, _ string, _ int64, _ string, _ bool) *handleModelRateLimitResult {
+ handleErrorCount++
+ return nil
+ })
+
+ result, err := svc.antigravityRetryLoop(p)
+
+ // 不应返回 error(Skipped 不触发账号切换)
+ require.NoError(t, err, "should not return error")
+ require.NotNil(t, result, "result should not be nil")
+ require.NotNil(t, result.resp, "response should not be nil")
+ defer func() { _ = result.resp.Body.Close() }()
+
+ // 状态码必须是 500(不透传原始状态码)
+ require.Equal(t, http.StatusInternalServerError, result.resp.StatusCode,
+ "skipped error should return 500, not %d", upstreamStatus)
+
+ // 不调用 handleError
+ require.Equal(t, 0, handleErrorCount,
+ "handleError should NOT be called for skipped errors")
+
+ // 不标记限流
+ require.Equal(t, 0, repo.rateLimitedCalls,
+ "SetRateLimited should NOT be called for skipped errors")
+
+ // 不停止调度
+ require.Equal(t, 0, repo.setErrCalls,
+ "SetError should NOT be called for skipped errors")
+
+ // 只调用一次上游(不重试)
+ require.Equal(t, 1, upstream.calls,
+ "should call upstream exactly once (no retry)")
+ })
+ }
+}
diff --git a/backend/internal/service/error_policy_test.go b/backend/internal/service/error_policy_test.go
index a8b69c22..9d7d025e 100644
--- a/backend/internal/service/error_policy_test.go
+++ b/backend/internal/service/error_policy_test.go
@@ -158,6 +158,7 @@ func TestApplyErrorPolicy(t *testing.T) {
statusCode int
body []byte
expectedHandled bool
+ expectedStatus int // expected outStatus
expectedSwitchErr bool // expect *AntigravityAccountSwitchError
handleErrorCalls int
}{
@@ -171,6 +172,7 @@ func TestApplyErrorPolicy(t *testing.T) {
statusCode: 500,
body: []byte(`"error"`),
expectedHandled: false,
+ expectedStatus: 500, // passthrough
handleErrorCalls: 0,
},
{
@@ -187,6 +189,7 @@ func TestApplyErrorPolicy(t *testing.T) {
statusCode: 500, // not in custom codes
body: []byte(`"error"`),
expectedHandled: true,
+ expectedStatus: http.StatusInternalServerError, // skipped → 500
handleErrorCalls: 0,
},
{
@@ -203,6 +206,7 @@ func TestApplyErrorPolicy(t *testing.T) {
statusCode: 500,
body: []byte(`"error"`),
expectedHandled: true,
+ expectedStatus: 500, // matched → original status
handleErrorCalls: 1,
},
{
@@ -225,6 +229,7 @@ func TestApplyErrorPolicy(t *testing.T) {
statusCode: 503,
body: []byte(`overloaded`),
expectedHandled: true,
+ expectedStatus: 503, // temp_unscheduled → original status
expectedSwitchErr: true,
handleErrorCalls: 0,
},
@@ -250,9 +255,10 @@ func TestApplyErrorPolicy(t *testing.T) {
isStickySession: true,
}
- handled, retErr := svc.applyErrorPolicy(p, tt.statusCode, http.Header{}, tt.body)
+ handled, outStatus, retErr := svc.applyErrorPolicy(p, tt.statusCode, http.Header{}, tt.body)
require.Equal(t, tt.expectedHandled, handled, "handled mismatch")
+ require.Equal(t, tt.expectedStatus, outStatus, "outStatus mismatch")
require.Equal(t, tt.handleErrorCalls, handleErrorCount, "handleError call count mismatch")
if tt.expectedSwitchErr {
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 2e1b0ba4..fb2dd1a8 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -243,6 +243,12 @@ var (
}
)
+// systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表
+// OAuth/SetupToken 账号转发时,匹配这些前缀的 system 元素会被移除
+var systemBlockFilterPrefixes = []string{
+ "x-anthropic-billing-header",
+}
+
// ErrClaudeCodeOnly 表示分组仅允许 Claude Code 客户端访问
var ErrClaudeCodeOnly = errors.New("this group only allows Claude Code clients")
@@ -1687,6 +1693,17 @@ func (s *GatewayService) listSchedulableAccounts(ctx context.Context, groupID *i
return accounts, useMixed, nil
}
+// IsSingleAntigravityAccountGroup 检查指定分组是否只有一个 antigravity 平台的可调度账号。
+// 用于 Handler 层在首次请求时提前设置 SingleAccountRetry context,
+// 避免单账号分组收到 503 时错误地设置模型限流标记导致后续请求连续快速失败。
+func (s *GatewayService) IsSingleAntigravityAccountGroup(ctx context.Context, groupID *int64) bool {
+ accounts, _, err := s.listSchedulableAccounts(ctx, groupID, PlatformAntigravity, true)
+ if err != nil {
+ return false
+ }
+ return len(accounts) == 1
+}
+
func (s *GatewayService) isAccountAllowedForPlatform(account *Account, platform string, useMixed bool) bool {
if account == nil {
return false
@@ -2702,6 +2719,60 @@ func hasClaudeCodePrefix(text string) bool {
return false
}
+// matchesFilterPrefix 检查文本是否匹配任一过滤前缀
+func matchesFilterPrefix(text string) bool {
+ for _, prefix := range systemBlockFilterPrefixes {
+ if strings.HasPrefix(text, prefix) {
+ return true
+ }
+ }
+ return false
+}
+
+// filterSystemBlocksByPrefix 从 body 的 system 中移除文本匹配 systemBlockFilterPrefixes 前缀的元素
+// 直接从 body 解析 system,不依赖外部传入的 parsed.System(因为前置步骤可能已修改 body 中的 system)
+func filterSystemBlocksByPrefix(body []byte) []byte {
+ sys := gjson.GetBytes(body, "system")
+ if !sys.Exists() {
+ return body
+ }
+
+ switch {
+ case sys.Type == gjson.String:
+ if matchesFilterPrefix(sys.Str) {
+ result, err := sjson.DeleteBytes(body, "system")
+ if err != nil {
+ return body
+ }
+ return result
+ }
+ case sys.IsArray():
+ var parsed []any
+ if err := json.Unmarshal([]byte(sys.Raw), &parsed); err != nil {
+ return body
+ }
+ filtered := make([]any, 0, len(parsed))
+ changed := false
+ for _, item := range parsed {
+ if m, ok := item.(map[string]any); ok {
+ if text, ok := m["text"].(string); ok && matchesFilterPrefix(text) {
+ changed = true
+ continue
+ }
+ }
+ filtered = append(filtered, item)
+ }
+ if changed {
+ result, err := sjson.SetBytes(body, "system", filtered)
+ if err != nil {
+ return body
+ }
+ return result
+ }
+ }
+ return body
+}
+
// injectClaudeCodePrompt 在 system 开头注入 Claude Code 提示词
// 处理 null、字符串、数组三种格式
func injectClaudeCodePrompt(body []byte, system any) []byte {
@@ -2981,6 +3052,12 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
body, reqModel = normalizeClaudeOAuthRequestBody(body, reqModel, normalizeOpts)
}
+ // OAuth/SetupToken 账号:移除黑名单前缀匹配的 system 元素(如客户端注入的计费元数据)
+ // 放在 inject/normalize 之后,确保不会被覆盖
+ if account.IsOAuth() {
+ body = filterSystemBlocksByPrefix(body)
+ }
+
// 强制执行 cache_control 块数量限制(最多 4 个)
body = enforceCacheControlLimit(body)
diff --git a/backend/internal/service/gemini_messages_compat_service.go b/backend/internal/service/gemini_messages_compat_service.go
index d9068a23..d2b0ac0d 100644
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -771,6 +771,14 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
break
}
+ // 错误策略优先:匹配则跳过重试直接处理。
+ if matched, rebuilt := s.checkErrorPolicyInLoop(ctx, account, resp); matched {
+ resp = rebuilt
+ break
+ } else {
+ resp = rebuilt
+ }
+
if resp.StatusCode >= 400 && s.shouldRetryGeminiUpstreamError(account, resp.StatusCode) {
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
_ = resp.Body.Close()
@@ -840,7 +848,7 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
if upstreamReqID == "" {
upstreamReqID = resp.Header.Get("x-goog-request-id")
}
- return nil, s.writeGeminiMappedError(c, account, resp.StatusCode, upstreamReqID, respBody)
+ return nil, s.writeGeminiMappedError(c, account, http.StatusInternalServerError, upstreamReqID, respBody)
case ErrorPolicyMatched, ErrorPolicyTempUnscheduled:
s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
upstreamReqID := resp.Header.Get(requestIDHeader)
@@ -1178,6 +1186,14 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
return nil, s.writeGoogleError(c, http.StatusBadGateway, "Upstream request failed after retries: "+safeErr)
}
+ // 错误策略优先:匹配则跳过重试直接处理。
+ if matched, rebuilt := s.checkErrorPolicyInLoop(ctx, account, resp); matched {
+ resp = rebuilt
+ break
+ } else {
+ resp = rebuilt
+ }
+
if resp.StatusCode >= 400 && s.shouldRetryGeminiUpstreamError(account, resp.StatusCode) {
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
_ = resp.Body.Close()
@@ -1285,7 +1301,7 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
if contentType == "" {
contentType = "application/json"
}
- c.Data(resp.StatusCode, contentType, respBody)
+ c.Data(http.StatusInternalServerError, contentType, respBody)
return nil, fmt.Errorf("gemini upstream error: %d (skipped by error policy)", resp.StatusCode)
case ErrorPolicyMatched, ErrorPolicyTempUnscheduled:
s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
@@ -1427,6 +1443,26 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
}, nil
}
+// checkErrorPolicyInLoop 在重试循环内预检查错误策略。
+// 返回 true 表示策略已匹配(调用者应 break),resp 已重建可直接使用。
+// 返回 false 表示 ErrorPolicyNone,resp 已重建,调用者继续走重试逻辑。
+func (s *GeminiMessagesCompatService) checkErrorPolicyInLoop(
+ ctx context.Context, account *Account, resp *http.Response,
+) (matched bool, rebuilt *http.Response) {
+ if resp.StatusCode < 400 || s.rateLimitService == nil {
+ return false, resp
+ }
+ body, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
+ _ = resp.Body.Close()
+ rebuilt = &http.Response{
+ StatusCode: resp.StatusCode,
+ Header: resp.Header.Clone(),
+ Body: io.NopCloser(bytes.NewReader(body)),
+ }
+ policy := s.rateLimitService.CheckErrorPolicy(ctx, account, resp.StatusCode, body)
+ return policy != ErrorPolicyNone, rebuilt
+}
+
func (s *GeminiMessagesCompatService) shouldRetryGeminiUpstreamError(account *Account, statusCode int) bool {
switch statusCode {
case 429, 500, 502, 503, 504, 529:
@@ -2576,6 +2612,10 @@ func asInt(v any) (int, bool) {
}
func (s *GeminiMessagesCompatService) handleGeminiUpstreamError(ctx context.Context, account *Account, statusCode int, headers http.Header, body []byte) {
+ // 遵守自定义错误码策略:未命中则跳过所有限流处理
+ if !account.ShouldHandleErrorCode(statusCode) {
+ return
+ }
if s.rateLimitService != nil && (statusCode == 401 || statusCode == 403 || statusCode == 529) {
s.rateLimitService.HandleUpstreamError(ctx, account, statusCode, headers, body)
return
diff --git a/backend/internal/service/ratelimit_service.go b/backend/internal/service/ratelimit_service.go
index 63732dee..12c48ab8 100644
--- a/backend/internal/service/ratelimit_service.go
+++ b/backend/internal/service/ratelimit_service.go
@@ -623,6 +623,10 @@ func (s *RateLimitService) ClearTempUnschedulable(ctx context.Context, accountID
slog.Warn("temp_unsched_cache_delete_failed", "account_id", accountID, "error", err)
}
}
+ // 同时清除模型级别限流
+ if err := s.accountRepo.ClearModelRateLimits(ctx, accountID); err != nil {
+ slog.Warn("clear_model_rate_limits_on_temp_unsched_reset_failed", "account_id", accountID, "error", err)
+ }
return nil
}
diff --git a/frontend/package.json b/frontend/package.json
index 325eba60..1b380b17 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -17,7 +17,7 @@
"dependencies": {
"@lobehub/icons": "^4.0.2",
"@vueuse/core": "^10.7.0",
- "axios": "^1.6.2",
+ "axios": "^1.13.5",
"chart.js": "^4.4.1",
"dompurify": "^3.3.1",
"driver.js": "^1.4.0",
diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml
index 9af2d7af..37c384b4 100644
--- a/frontend/pnpm-lock.yaml
+++ b/frontend/pnpm-lock.yaml
@@ -15,8 +15,8 @@ importers:
specifier: ^10.7.0
version: 10.11.1(vue@3.5.26(typescript@5.6.3))
axios:
- specifier: ^1.6.2
- version: 1.13.2
+ specifier: ^1.13.5
+ version: 1.13.5
chart.js:
specifier: ^4.4.1
version: 4.5.1
@@ -1257,56 +1257,67 @@ packages:
resolution: {integrity: sha512-EHMUcDwhtdRGlXZsGSIuXSYwD5kOT9NVnx9sqzYiwAc91wfYOE1g1djOEDseZJKKqtHAHGwnGPQu3kytmfaXLQ==}
cpu: [arm]
os: [linux]
+ libc: [glibc]
'@rollup/rollup-linux-arm-musleabihf@4.54.0':
resolution: {integrity: sha512-+pBrqEjaakN2ySv5RVrj/qLytYhPKEUwk+e3SFU5jTLHIcAtqh2rLrd/OkbNuHJpsBgxsD8ccJt5ga/SeG0JmA==}
cpu: [arm]
os: [linux]
+ libc: [musl]
'@rollup/rollup-linux-arm64-gnu@4.54.0':
resolution: {integrity: sha512-NSqc7rE9wuUaRBsBp5ckQ5CVz5aIRKCwsoa6WMF7G01sX3/qHUw/z4pv+D+ahL1EIKy6Enpcnz1RY8pf7bjwng==}
cpu: [arm64]
os: [linux]
+ libc: [glibc]
'@rollup/rollup-linux-arm64-musl@4.54.0':
resolution: {integrity: sha512-gr5vDbg3Bakga5kbdpqx81m2n9IX8M6gIMlQQIXiLTNeQW6CucvuInJ91EuCJ/JYvc+rcLLsDFcfAD1K7fMofg==}
cpu: [arm64]
os: [linux]
+ libc: [musl]
'@rollup/rollup-linux-loong64-gnu@4.54.0':
resolution: {integrity: sha512-gsrtB1NA3ZYj2vq0Rzkylo9ylCtW/PhpLEivlgWe0bpgtX5+9j9EZa0wtZiCjgu6zmSeZWyI/e2YRX1URozpIw==}
cpu: [loong64]
os: [linux]
+ libc: [glibc]
'@rollup/rollup-linux-ppc64-gnu@4.54.0':
resolution: {integrity: sha512-y3qNOfTBStmFNq+t4s7Tmc9hW2ENtPg8FeUD/VShI7rKxNW7O4fFeaYbMsd3tpFlIg1Q8IapFgy7Q9i2BqeBvA==}
cpu: [ppc64]
os: [linux]
+ libc: [glibc]
'@rollup/rollup-linux-riscv64-gnu@4.54.0':
resolution: {integrity: sha512-89sepv7h2lIVPsFma8iwmccN7Yjjtgz0Rj/Ou6fEqg3HDhpCa+Et+YSufy27i6b0Wav69Qv4WBNl3Rs6pwhebQ==}
cpu: [riscv64]
os: [linux]
+ libc: [glibc]
'@rollup/rollup-linux-riscv64-musl@4.54.0':
resolution: {integrity: sha512-ZcU77ieh0M2Q8Ur7D5X7KvK+UxbXeDHwiOt/CPSBTI1fBmeDMivW0dPkdqkT4rOgDjrDDBUed9x4EgraIKoR2A==}
cpu: [riscv64]
os: [linux]
+ libc: [musl]
'@rollup/rollup-linux-s390x-gnu@4.54.0':
resolution: {integrity: sha512-2AdWy5RdDF5+4YfG/YesGDDtbyJlC9LHmL6rZw6FurBJ5n4vFGupsOBGfwMRjBYH7qRQowT8D/U4LoSvVwOhSQ==}
cpu: [s390x]
os: [linux]
+ libc: [glibc]
'@rollup/rollup-linux-x64-gnu@4.54.0':
resolution: {integrity: sha512-WGt5J8Ij/rvyqpFexxk3ffKqqbLf9AqrTBbWDk7ApGUzaIs6V+s2s84kAxklFwmMF/vBNGrVdYgbblCOFFezMQ==}
cpu: [x64]
os: [linux]
+ libc: [glibc]
'@rollup/rollup-linux-x64-musl@4.54.0':
resolution: {integrity: sha512-JzQmb38ATzHjxlPHuTH6tE7ojnMKM2kYNzt44LO/jJi8BpceEC8QuXYA908n8r3CNuG/B3BV8VR3Hi1rYtmPiw==}
cpu: [x64]
os: [linux]
+ libc: [musl]
'@rollup/rollup-openharmony-arm64@4.54.0':
resolution: {integrity: sha512-huT3fd0iC7jigGh7n3q/+lfPcXxBi+om/Rs3yiFxjvSxbSB6aohDFXbWvlspaqjeOh+hx7DDHS+5Es5qRkWkZg==}
@@ -1805,8 +1816,8 @@ packages:
peerDependencies:
postcss: ^8.1.0
- axios@1.13.2:
- resolution: {integrity: sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==}
+ axios@1.13.5:
+ resolution: {integrity: sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==}
babel-plugin-macros@3.1.0:
resolution: {integrity: sha512-Cg7TFGpIr01vOQNODXOOaGz2NpCU5gl8x1qJFbb6hbZxR7XrcE2vtbAsTAbJ7/xwJtUuJEw8K8Zr/AE0LHlesg==}
@@ -6387,7 +6398,7 @@ snapshots:
postcss: 8.5.6
postcss-value-parser: 4.2.0
- axios@1.13.2:
+ axios@1.13.5:
dependencies:
follow-redirects: 1.15.11
form-data: 4.0.5
diff --git a/frontend/src/components/account/CreateAccountModal.vue b/frontend/src/components/account/CreateAccountModal.vue
index 0e294ee6..a7290cbf 100644
--- a/frontend/src/components/account/CreateAccountModal.vue
+++ b/frontend/src/components/account/CreateAccountModal.vue
@@ -665,8 +665,8 @@
{{ t('admin.accounts.upstream.baseUrlHint') }}
@@ -816,8 +816,8 @@ - -{{ baseUrlHint }}
@@ -55,14 +57,16 @@ ? 'sk-proj-...' : account.platform === 'gemini' ? 'AIza...' - : 'sk-ant-...' + : account.platform === 'antigravity' + ? 'sk-...' + : 'sk-ant-...' " />{{ t('admin.accounts.leaveEmptyToKeep') }}
{{ t('admin.accounts.upstream.baseUrlHint') }}