Merge branch 'main' into test-sora

2026-02-10 18:01:17 +08:00
parent e489996713 1dd3158c7e
commit 3b0910f664
19 changed files with 1541 additions and 43 deletions
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -240,6 +240,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		var lastFailoverErr *service.UpstreamFailoverError
 		var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
 		// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
 		// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
 		if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
 			ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
 			c.Request = c.Request.WithContext(ctx)
 		}
 		for {
 			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制
 			if err != nil {
@@ -248,6 +255,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
 					return
 				}
 				// Antigravity 单账号退避重试：分组内没有其他可用账号时，
 				// 对 503 错误不直接返回，而是清除排除列表、等待退避后重试同一个账号。
 				// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
 				if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
 					if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
 						log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
 						failedAccountIDs = make(map[int64]struct{})
 						// 设置 context 标记，让 Service 层预检查等待限流过期而非直接切换
 						ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
 						c.Request = c.Request.WithContext(ctx)
 						continue
 					}
 				}
 				if lastFailoverErr != nil {
 					h.handleFailoverExhausted(c, lastFailoverErr, service.PlatformGemini, streamStarted)
 				} else {
@@ -397,6 +417,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	}
 	fallbackUsed := false
 	// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
 	// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
 	if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), currentAPIKey.GroupID) {
 		ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
 		c.Request = c.Request.WithContext(ctx)
 	}
 	for {
 		maxAccountSwitches := h.maxAccountSwitches
 		switchCount := 0
@@ -414,6 +441,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
 					return
 				}
 				// Antigravity 单账号退避重试：分组内没有其他可用账号时，
 				// 对 503 错误不直接返回，而是清除排除列表、等待退避后重试同一个账号。
 				// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
 				if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
 					if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
 						log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
 						failedAccountIDs = make(map[int64]struct{})
 						// 设置 context 标记，让 Service 层预检查等待限流过期而非直接切换
 						ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
 						c.Request = c.Request.WithContext(ctx)
 						continue
 					}
 				}
 				if lastFailoverErr != nil {
 					h.handleFailoverExhausted(c, lastFailoverErr, platform, streamStarted)
 				} else {
@@ -851,6 +891,27 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
 	}
 }
 // sleepAntigravitySingleAccountBackoff Antigravity 平台单账号分组的 503 退避重试延时。
 // 当分组内只有一个可用账号且上游返回 503（MODEL_CAPACITY_EXHAUSTED）时使用，
 // 采用短固定延时策略。Service 层在 SingleAccountRetry 模式下已经做了充分的原地重试
 // （最多 3 次、总等待 30s），所以 Handler 层的退避只需短暂等待即可。
 // 返回 false 表示 context 已取消。
 func sleepAntigravitySingleAccountBackoff(ctx context.Context, retryCount int) bool {
 	// 固定短延时：2s
 	// Service 层已经在原地等待了足够长的时间（retryDelay × 重试次数），
 	// Handler 层只需短暂间隔后重新进入 Service 层即可。
 	const delay = 2 * time.Second
 	log.Printf("Antigravity single-account 503 backoff: waiting %v before retry (attempt %d)", delay, retryCount)
 	select {
 	case <-ctx.Done():
 		return false
 	case <-time.After(delay):
 		return true
 	}
 }
 func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
 	statusCode := failoverErr.StatusCode
 	responseBody := failoverErr.ResponseBody
--- a/backend/internal/handler/gateway_handler_single_account_retry_test.go
+++ b/backend/internal/handler/gateway_handler_single_account_retry_test.go
@@ -0,0 +1,51 @@
 package handler
 import (
 	"context"
 	"testing"
 	"time"
 	"github.com/stretchr/testify/require"
 )
 // ---------------------------------------------------------------------------
 // sleepAntigravitySingleAccountBackoff 测试
 // ---------------------------------------------------------------------------
 func TestSleepAntigravitySingleAccountBackoff_ReturnsTrue(t *testing.T) {
 	ctx := context.Background()
 	start := time.Now()
 	ok := sleepAntigravitySingleAccountBackoff(ctx, 1)
 	elapsed := time.Since(start)
 	require.True(t, ok, "should return true when context is not canceled")
 	// 固定延迟 2s
 	require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond, "should wait approximately 2s")
 	require.Less(t, elapsed, 5*time.Second, "should not wait too long")
 }
 func TestSleepAntigravitySingleAccountBackoff_ContextCanceled(t *testing.T) {
 	ctx, cancel := context.WithCancel(context.Background())
 	cancel() // 立即取消
 	start := time.Now()
 	ok := sleepAntigravitySingleAccountBackoff(ctx, 1)
 	elapsed := time.Since(start)
 	require.False(t, ok, "should return false when context is canceled")
 	require.Less(t, elapsed, 500*time.Millisecond, "should return immediately on cancel")
 }
 func TestSleepAntigravitySingleAccountBackoff_FixedDelay(t *testing.T) {
 	// 验证不同 retryCount 都使用固定 2s 延迟
 	ctx := context.Background()
 	start := time.Now()
 	ok := sleepAntigravitySingleAccountBackoff(ctx, 5)
 	elapsed := time.Since(start)
 	require.True(t, ok)
 	// 即使 retryCount=5，延迟仍然是固定的 2s
 	require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond)
 	require.Less(t, elapsed, 5*time.Second)
 }
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -327,6 +327,13 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 	var lastFailoverErr *service.UpstreamFailoverError
 	var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
 	// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
 	// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
 	if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
 		ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
 		c.Request = c.Request.WithContext(ctx)
 	}
 	for {
 		selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs, "") // Gemini 不使用会话限制
 		if err != nil {
@@ -334,6 +341,19 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 				googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
 				return
 			}
 			// Antigravity 单账号退避重试：分组内没有其他可用账号时，
 			// 对 503 错误不直接返回，而是清除排除列表、等待退避后重试同一个账号。
 			// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
 			if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
 				if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
 					log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
 					failedAccountIDs = make(map[int64]struct{})
 					// 设置 context 标记，让 Service 层预检查等待限流过期而非直接切换
 					ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
 					c.Request = c.Request.WithContext(ctx)
 					continue
 				}
 			}
 			h.handleGeminiFailoverExhausted(c, lastFailoverErr)
 			return
 		}
--- a/backend/internal/pkg/antigravity/request_transformer.go
+++ b/backend/internal/pkg/antigravity/request_transformer.go
@@ -271,6 +271,21 @@ func filterOpenCodePrompt(text string) string {
 	return ""
 }
 // systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表
 var systemBlockFilterPrefixes = []string{
 	"x-anthropic-billing-header",
 }
 // filterSystemBlockByPrefix 如果文本匹配过滤前缀，返回空字符串
 func filterSystemBlockByPrefix(text string) string {
 	for _, prefix := range systemBlockFilterPrefixes {
 		if strings.HasPrefix(text, prefix) {
 			return ""
 		}
 	}
 	return text
 }
 // buildSystemInstruction 构建 systemInstruction（与 Antigravity-Manager 保持一致）
 func buildSystemInstruction(system json.RawMessage, modelName string, opts TransformOptions, tools []ClaudeTool) *GeminiContent {
 	var parts []GeminiPart
@@ -287,8 +302,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans
 				if strings.Contains(sysStr, "You are Antigravity") {
 					userHasAntigravityIdentity = true
 				}
-				// 过滤 OpenCode 默认提示词
+				// 过滤 OpenCode 默认提示词和黑名单前缀
-				filtered := filterOpenCodePrompt(sysStr)
+				filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(sysStr))
 				if filtered != "" {
 					userSystemParts = append(userSystemParts, GeminiPart{Text: filtered})
 				}
@@ -302,8 +317,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans
 						if strings.Contains(block.Text, "You are Antigravity") {
 							userHasAntigravityIdentity = true
 						}
-						// 过滤 OpenCode 默认提示词
+						// 过滤 OpenCode 默认提示词和黑名单前缀
-						filtered := filterOpenCodePrompt(block.Text)
+						filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(block.Text))
 						if filtered != "" {
 							userSystemParts = append(userSystemParts, GeminiPart{Text: filtered})
 						}
--- a/backend/internal/pkg/ctxkey/ctxkey.go
+++ b/backend/internal/pkg/ctxkey/ctxkey.go
@@ -28,4 +28,8 @@ const (
 	// IsMaxTokensOneHaikuRequest 标识当前请求是否为 max_tokens=1 + haiku 模型的探测请求
 	// 用于 ClaudeCodeOnly 验证绕过（绕过 system prompt 检查，但仍需验证 User-Agent）
 	IsMaxTokensOneHaikuRequest Key = "ctx_is_max_tokens_one_haiku"
 	// SingleAccountRetry 标识当前请求处于单账号 503 退避重试模式。
 	// 在此模式下，Service 层的模型限流预检查将等待限流过期而非直接切换账号。
 	SingleAccountRetry Key = "ctx_single_account_retry"
 )
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -20,6 +20,7 @@ import (
 	"time"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 	"github.com/tidwall/gjson"
@@ -47,6 +48,19 @@ const (
 	googleRPCTypeErrorInfo                = "type.googleapis.com/google.rpc.ErrorInfo"
 	googleRPCReasonModelCapacityExhausted = "MODEL_CAPACITY_EXHAUSTED"
 	googleRPCReasonRateLimitExceeded      = "RATE_LIMIT_EXCEEDED"
 	// 单账号 503 退避重试：Service 层原地重试的最大次数
 	// 在 handleSmartRetry 中，对于 shouldRateLimitModel（长延迟 ≥ 7s）的情况，
 	// 多账号模式下会设限流+切换账号；但单账号模式下改为原地等待+重试。
 	antigravitySingleAccountSmartRetryMaxAttempts = 3
 	// 单账号 503 退避重试：原地重试时单次最大等待时间
 	// 防止上游返回过长的 retryDelay 导致请求卡住太久
 	antigravitySingleAccountSmartRetryMaxWait = 15 * time.Second
 	// 单账号 503 退避重试：原地重试的总累计等待时间上限
 	// 超过此上限将不再重试，直接返回 503
 	antigravitySingleAccountSmartRetryTotalMaxWait = 30 * time.Second
 )
 // antigravityPassthroughErrorMessages 透传给客户端的错误消息白名单（小写）
@@ -149,6 +163,13 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 	// 情况1: retryDelay >= 阈值，限流模型并切换账号
 	if shouldRateLimitModel {
 		// 单账号 503 退避重试模式：不设限流、不切换账号，改为原地等待+重试
 		// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
 		// 多账号场景下切换账号是最优选择，但单账号场景下设限流毫无意义（只会导致双重等待）。
 		if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) {
 			return s.handleSingleAccountRetryInPlace(p, resp, respBody, baseURL, waitDuration, modelName)
 		}
 		rateLimitDuration := waitDuration
 		if rateLimitDuration <= 0 {
 			rateLimitDuration = antigravityDefaultRateLimitDuration
@@ -237,7 +258,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 			}
 		}
-		// 所有重试都失败，限流当前模型并切换账号
+		// 所有重试都失败
 		rateLimitDuration := waitDuration
 		if rateLimitDuration <= 0 {
 			rateLimitDuration = antigravityDefaultRateLimitDuration
@@ -246,6 +267,22 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 		if retryBody == nil {
 			retryBody = respBody
 		}
 		// 单账号 503 退避重试模式：智能重试耗尽后不设限流、不切换账号，
 		// 直接返回 503 让 Handler 层的单账号退避循环做最终处理。
 		if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) {
 			log.Printf("%s status=%d smart_retry_exhausted_single_account attempts=%d model=%s account=%d body=%s (return 503 directly)",
 				p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200))
 			return &smartRetryResult{
 				action: smartRetryActionBreakWithResp,
 				resp: &http.Response{
 					StatusCode: resp.StatusCode,
 					Header:     resp.Header.Clone(),
 					Body:       io.NopCloser(bytes.NewReader(retryBody)),
 				},
 			}
 		}
 		log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)",
 			p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200))
@@ -280,17 +317,152 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 	return &smartRetryResult{action: smartRetryActionContinue}
 }
 // handleSingleAccountRetryInPlace 单账号 503 退避重试的原地重试逻辑。
 //
 // 在多账号场景下，收到 503 + 长 retryDelay（≥ 7s）时会设置模型限流 + 切换账号；
 // 但在单账号场景下，设限流毫无意义（因为切换回来的还是同一个账号，还要等限流过期）。
 // 此方法改为在 Service 层原地等待 + 重试，避免双重等待问题：
 //
 //	旧流程：Service 设限流 → Handler 退避等待 → Service 等限流过期 → 再请求（总耗时 = 退避 + 限流）
 //	新流程：Service 直接等 retryDelay → 重试 → 成功/再等 → 重试...（总耗时 ≈ 实际 retryDelay × 重试次数）
 //
 // 约束：
 //   - 单次等待不超过 antigravitySingleAccountSmartRetryMaxWait
 //   - 总累计等待不超过 antigravitySingleAccountSmartRetryTotalMaxWait
 //   - 最多重试 antigravitySingleAccountSmartRetryMaxAttempts 次
 func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace(
 	p antigravityRetryLoopParams,
 	resp *http.Response,
 	respBody []byte,
 	baseURL string,
 	waitDuration time.Duration,
 	modelName string,
 ) *smartRetryResult {
 	// 限制单次等待时间
 	if waitDuration > antigravitySingleAccountSmartRetryMaxWait {
 		waitDuration = antigravitySingleAccountSmartRetryMaxWait
 	}
 	if waitDuration < antigravitySmartRetryMinWait {
 		waitDuration = antigravitySmartRetryMinWait
 	}
 	log.Printf("%s status=%d single_account_503_retry_in_place model=%s account=%d upstream_retry_delay=%v (retrying in-place instead of rate-limiting)",
 		p.prefix, resp.StatusCode, modelName, p.account.ID, waitDuration)
 	var lastRetryResp *http.Response
 	var lastRetryBody []byte
 	totalWaited := time.Duration(0)
 	for attempt := 1; attempt <= antigravitySingleAccountSmartRetryMaxAttempts; attempt++ {
 		// 检查累计等待是否超限
 		if totalWaited+waitDuration > antigravitySingleAccountSmartRetryTotalMaxWait {
 			remaining := antigravitySingleAccountSmartRetryTotalMaxWait - totalWaited
 			if remaining <= 0 {
 				log.Printf("%s single_account_503_retry: total_wait_exceeded total=%v max=%v, giving up",
 					p.prefix, totalWaited, antigravitySingleAccountSmartRetryTotalMaxWait)
 				break
 			}
 			waitDuration = remaining
 		}
 		log.Printf("%s status=%d single_account_503_retry attempt=%d/%d delay=%v total_waited=%v model=%s account=%d",
 			p.prefix, resp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, waitDuration, totalWaited, modelName, p.account.ID)
 		select {
 		case <-p.ctx.Done():
 			log.Printf("%s status=context_canceled_during_single_account_retry", p.prefix)
 			return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
 		case <-time.After(waitDuration):
 		}
 		totalWaited += waitDuration
 		// 创建新请求
 		retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
 		if err != nil {
 			log.Printf("%s single_account_503_retry: request_build_failed error=%v", p.prefix, err)
 			break
 		}
 		retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
 		if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
 			log.Printf("%s status=%d single_account_503_retry_success attempt=%d/%d total_waited=%v",
 				p.prefix, retryResp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited)
 			// 关闭之前的响应
 			if lastRetryResp != nil {
 				_ = lastRetryResp.Body.Close()
 			}
 			return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
 		}
 		// 网络错误时继续重试
 		if retryErr != nil || retryResp == nil {
 			log.Printf("%s single_account_503_retry: network_error attempt=%d/%d error=%v",
 				p.prefix, attempt, antigravitySingleAccountSmartRetryMaxAttempts, retryErr)
 			continue
 		}
 		// 关闭之前的响应
 		if lastRetryResp != nil {
 			_ = lastRetryResp.Body.Close()
 		}
 		lastRetryResp = retryResp
 		lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
 		_ = retryResp.Body.Close()
 		// 解析新的重试信息，更新下次等待时间
 		if attempt < antigravitySingleAccountSmartRetryMaxAttempts && lastRetryBody != nil {
 			_, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
 			if newWaitDuration > 0 {
 				waitDuration = newWaitDuration
 				if waitDuration > antigravitySingleAccountSmartRetryMaxWait {
 					waitDuration = antigravitySingleAccountSmartRetryMaxWait
 				}
 				if waitDuration < antigravitySmartRetryMinWait {
 					waitDuration = antigravitySmartRetryMinWait
 				}
 			}
 		}
 	}
 	// 所有重试都失败，不设限流，直接返回 503
 	// Handler 层的单账号退避循环会做最终处理
 	retryBody := lastRetryBody
 	if retryBody == nil {
 		retryBody = respBody
 	}
 	log.Printf("%s status=%d single_account_503_retry_exhausted attempts=%d total_waited=%v model=%s account=%d body=%s (return 503 directly)",
 		p.prefix, resp.StatusCode, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited, modelName, p.account.ID, truncateForLog(retryBody, 200))
 	return &smartRetryResult{
 		action: smartRetryActionBreakWithResp,
 		resp: &http.Response{
 			StatusCode: resp.StatusCode,
 			Header:     resp.Header.Clone(),
 			Body:       io.NopCloser(bytes.NewReader(retryBody)),
 		},
 	}
 }
 // antigravityRetryLoop 执行带 URL fallback 的重试循环
 func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) {
 	// 预检查：如果账号已限流，直接返回切换信号
 	if p.requestedModel != "" {
 		if remaining := p.account.GetRateLimitRemainingTimeWithContext(p.ctx, p.requestedModel); remaining > 0 {
-			log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
+			// 单账号 503 退避重试模式：跳过限流预检查，直接发请求。
-				p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
+			// 首次请求设的限流是为了多账号调度器跳过该账号，在单账号模式下无意义。
-			return nil, &AntigravityAccountSwitchError{
+			// 如果上游确实还不可用，handleSmartRetry → handleSingleAccountRetryInPlace
-				OriginalAccountID: p.account.ID,
+			// 会在 Service 层原地等待+重试，不需要在预检查这里等。
-				RateLimitedModel:  p.requestedModel,
+			if isSingleAccountRetry(p.ctx) {
-				IsStickySession:   p.isStickySession,
+				log.Printf("%s pre_check: single_account_retry skipping rate_limit remaining=%v model=%s account=%d (will retry in-place if 503)",
 					p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
 			} else {
 				log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
 					p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
 				return nil, &AntigravityAccountSwitchError{
 					OriginalAccountID: p.account.ID,
 					RateLimitedModel:  p.requestedModel,
 					IsStickySession:   p.isStickySession,
 				}
 			}
 		}
 	}
@@ -372,12 +544,12 @@ urlFallbackLoop:
 				_ = resp.Body.Close()
 				// ★ 统一入口：自定义错误码 + 临时不可调度
-				if handled, policyErr := s.applyErrorPolicy(p, resp.StatusCode, resp.Header, respBody); handled {
+				if handled, outStatus, policyErr := s.applyErrorPolicy(p, resp.StatusCode, resp.Header, respBody); handled {
 					if policyErr != nil {
 						return nil, policyErr
 					}
 					resp = &http.Response{
-						StatusCode: resp.StatusCode,
+						StatusCode: outStatus,
 						Header:     resp.Header.Clone(),
 						Body:       io.NopCloser(bytes.NewReader(respBody)),
 					}
@@ -611,21 +783,22 @@ func (s *AntigravityGatewayService) checkErrorPolicy(ctx context.Context, accoun
 	return s.rateLimitService.CheckErrorPolicy(ctx, account, statusCode, body)
 }
-// applyErrorPolicy 应用错误策略结果，返回是否应终止当前循环
+// applyErrorPolicy 应用错误策略结果，返回是否应终止当前循环及应返回的状态码。
-func (s *AntigravityGatewayService) applyErrorPolicy(p antigravityRetryLoopParams, statusCode int, headers http.Header, respBody []byte) (handled bool, retErr error) {
+// ErrorPolicySkipped 时 outStatus 为 500（前端约定：未命中的错误返回 500）。
 func (s *AntigravityGatewayService) applyErrorPolicy(p antigravityRetryLoopParams, statusCode int, headers http.Header, respBody []byte) (handled bool, outStatus int, retErr error) {
 	switch s.checkErrorPolicy(p.ctx, p.account, statusCode, respBody) {
 	case ErrorPolicySkipped:
-		return true, nil
+		return true, http.StatusInternalServerError, nil
 	case ErrorPolicyMatched:
 		_ = p.handleError(p.ctx, p.prefix, p.account, statusCode, headers, respBody,
 			p.requestedModel, p.groupID, p.sessionHash, p.isStickySession)
-		return true, nil
+		return true, statusCode, nil
 	case ErrorPolicyTempUnscheduled:
 		slog.Info("temp_unschedulable_matched",
 			"prefix", p.prefix, "status_code", statusCode, "account_id", p.account.ID)
-		return true, &AntigravityAccountSwitchError{OriginalAccountID: p.account.ID, IsStickySession: p.isStickySession}
+		return true, statusCode, &AntigravityAccountSwitchError{OriginalAccountID: p.account.ID, IsStickySession: p.isStickySession}
 	}
-	return false, nil
+	return false, statusCode, nil
 }
 // mapAntigravityModel 获取映射后的模型名
@@ -1940,6 +2113,12 @@ func sleepAntigravityBackoffWithContext(ctx context.Context, attempt int) bool {
 	}
 }
 // isSingleAccountRetry 检查 context 中是否设置了单账号退避重试标记
 func isSingleAccountRetry(ctx context.Context) bool {
 	v, _ := ctx.Value(ctxkey.SingleAccountRetry).(bool)
 	return v
 }
 // setModelRateLimitByModelName 使用官方模型 ID 设置模型级限流
 // 直接使用上游返回的模型 ID（如 claude-sonnet-4-5）作为限流 key
 // 返回是否已成功设置（若模型名为空或 repo 为 nil 将返回 false）
@@ -2239,6 +2418,10 @@ func (s *AntigravityGatewayService) handleUpstreamError(
 	requestedModel string,
 	groupID int64, sessionHash string, isStickySession bool,
 ) *handleModelRateLimitResult {
 	// 遵守自定义错误码策略：未命中则跳过所有限流处理
 	if !account.ShouldHandleErrorCode(statusCode) {
 		return nil
 	}
 	// 模型级限流处理（优先）
 	result := s.handleModelRateLimit(&handleModelRateLimitParams{
 		ctx:             ctx,
--- a/backend/internal/service/antigravity_single_account_retry_test.go
+++ b/backend/internal/service/antigravity_single_account_retry_test.go
@@ -0,0 +1,902 @@
 //go:build unit
 package service
 import (
 	"bytes"
 	"context"
 	"io"
 	"net/http"
 	"strings"
 	"testing"
 	"time"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
 	"github.com/stretchr/testify/require"
 )
 // ---------------------------------------------------------------------------
 // 辅助函数：构造带 SingleAccountRetry 标记的 context
 // ---------------------------------------------------------------------------
 func ctxWithSingleAccountRetry() context.Context {
 	return context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true)
 }
 // ---------------------------------------------------------------------------
 // 1. isSingleAccountRetry 测试
 // ---------------------------------------------------------------------------
 func TestIsSingleAccountRetry_True(t *testing.T) {
 	ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true)
 	require.True(t, isSingleAccountRetry(ctx))
 }
 func TestIsSingleAccountRetry_False_NoValue(t *testing.T) {
 	require.False(t, isSingleAccountRetry(context.Background()))
 }
 func TestIsSingleAccountRetry_False_ExplicitFalse(t *testing.T) {
 	ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, false)
 	require.False(t, isSingleAccountRetry(ctx))
 }
 func TestIsSingleAccountRetry_False_WrongType(t *testing.T) {
 	ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, "true")
 	require.False(t, isSingleAccountRetry(ctx))
 }
 // ---------------------------------------------------------------------------
 // 2. 常量验证
 // ---------------------------------------------------------------------------
 func TestSingleAccountRetryConstants(t *testing.T) {
 	require.Equal(t, 3, antigravitySingleAccountSmartRetryMaxAttempts,
 		"单账号原地重试最多 3 次")
 	require.Equal(t, 15*time.Second, antigravitySingleAccountSmartRetryMaxWait,
 		"单次最大等待 15s")
 	require.Equal(t, 30*time.Second, antigravitySingleAccountSmartRetryTotalMaxWait,
 		"总累计等待不超过 30s")
 }
 // ---------------------------------------------------------------------------
 // 3. handleSmartRetry + 503 + SingleAccountRetry → 走 handleSingleAccountRetryInPlace
 //    （而非设模型限流 + 切换账号）
 // ---------------------------------------------------------------------------
 // TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace
 // 核心场景：503 + retryDelay >= 7s + SingleAccountRetry 标记
 // → 不设模型限流、不切换账号，改为原地重试
 func TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace(t *testing.T) {
 	// 原地重试成功
 	successResp := &http.Response{
 		StatusCode: http.StatusOK,
 		Header:     http.Header{},
 		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
 	}
 	upstream := &mockSmartRetryUpstream{
 		responses: []*http.Response{successResp},
 		errors:    []error{nil},
 	}
 	repo := &stubAntigravityAccountRepo{}
 	account := &Account{
 		ID:          1,
 		Name:        "acc-single",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Concurrency: 1,
 	}
 	// 503 + 39s >= 7s 阈值 + MODEL_CAPACITY_EXHAUSTED
 	respBody := []byte(`{
 		"error": {
 			"code": 503,
 			"status": "UNAVAILABLE",
 			"details": [
 				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
 			],
 			"message": "No capacity available for model gemini-3-pro-high on the server"
 		}
 	}`)
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 		Body:       io.NopCloser(bytes.NewReader(respBody)),
 	}
 	params := antigravityRetryLoopParams{
 		ctx:          ctxWithSingleAccountRetry(), // 关键：设置单账号标记
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 		accountRepo:  repo,
 		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
 	availableURLs := []string{"https://ag-1.test"}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	// 关键断言：返回 resp（原地重试成功），而非 switchError（切换账号）
 	require.NotNil(t, result.resp, "should return successful response from in-place retry")
 	require.Equal(t, http.StatusOK, result.resp.StatusCode)
 	require.Nil(t, result.switchError, "should NOT return switchError in single account mode")
 	require.Nil(t, result.err)
 	// 验证未设模型限流（单账号模式不应设限流）
 	require.Len(t, repo.modelRateLimitCalls, 0,
 		"should NOT set model rate limit in single account retry mode")
 	// 验证确实调用了 upstream（原地重试）
 	require.GreaterOrEqual(t, len(upstream.calls), 1, "should have made at least one retry call")
 }
 // TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches
 // 对照组：503 + retryDelay >= 7s + 无 SingleAccountRetry 标记
 // → 照常设模型限流 + 切换账号
 func TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches(t *testing.T) {
 	repo := &stubAntigravityAccountRepo{}
 	account := &Account{
 		ID:       2,
 		Name:     "acc-multi",
 		Type:     AccountTypeOAuth,
 		Platform: PlatformAntigravity,
 	}
 	// 503 + 39s >= 7s 阈值
 	respBody := []byte(`{
 		"error": {
 			"code": 503,
 			"status": "UNAVAILABLE",
 			"details": [
 				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
 			]
 		}
 	}`)
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 		Body:       io.NopCloser(bytes.NewReader(respBody)),
 	}
 	params := antigravityRetryLoopParams{
 		ctx:         context.Background(), // 关键：无单账号标记
 		prefix:      "[test]",
 		account:     account,
 		accessToken: "token",
 		action:      "generateContent",
 		body:        []byte(`{"input":"test"}`),
 		accountRepo: repo,
 		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
 	availableURLs := []string{"https://ag-1.test"}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	// 对照：多账号模式返回 switchError
 	require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
 	require.Nil(t, result.resp, "should not return resp when switchError is set")
 	// 对照：多账号模式应设模型限流
 	require.Len(t, repo.modelRateLimitCalls, 1,
 		"multi-account mode SHOULD set model rate limit")
 }
 // TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches
 // 边界情况：429（非 503）+ SingleAccountRetry 标记
 // → 单账号原地重试仅针对 503，429 依然走切换账号逻辑
 func TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches(t *testing.T) {
 	repo := &stubAntigravityAccountRepo{}
 	account := &Account{
 		ID:       3,
 		Name:     "acc-429",
 		Type:     AccountTypeOAuth,
 		Platform: PlatformAntigravity,
 	}
 	// 429 + 15s >= 7s 阈值
 	respBody := []byte(`{
 		"error": {
 			"status": "RESOURCE_EXHAUSTED",
 			"details": [
 				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "15s"}
 			]
 		}
 	}`)
 	resp := &http.Response{
 		StatusCode: http.StatusTooManyRequests, // 429，不是 503
 		Header:     http.Header{},
 		Body:       io.NopCloser(bytes.NewReader(respBody)),
 	}
 	params := antigravityRetryLoopParams{
 		ctx:         ctxWithSingleAccountRetry(), // 有单账号标记
 		prefix:      "[test]",
 		account:     account,
 		accessToken: "token",
 		action:      "generateContent",
 		body:        []byte(`{"input":"test"}`),
 		accountRepo: repo,
 		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
 	availableURLs := []string{"https://ag-1.test"}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	// 429 即使有单账号标记，也应走切换账号
 	require.NotNil(t, result.switchError, "429 should still return switchError even with SingleAccountRetry")
 	require.Len(t, repo.modelRateLimitCalls, 1,
 		"429 should still set model rate limit even with SingleAccountRetry")
 }
 // ---------------------------------------------------------------------------
 // 4. handleSmartRetry + 503 + 短延迟 + SingleAccountRetry → 智能重试耗尽后不设限流
 // ---------------------------------------------------------------------------
 // TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit
 // 503 + retryDelay < 7s + SingleAccountRetry → 智能重试耗尽后直接返回 503，不设限流
 func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testing.T) {
 	// 智能重试也返回 503
 	failRespBody := `{
 		"error": {
 			"code": 503,
 			"status": "UNAVAILABLE",
 			"details": [
 				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
 			]
 		}
 	}`
 	failResp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 		Body:       io.NopCloser(strings.NewReader(failRespBody)),
 	}
 	upstream := &mockSmartRetryUpstream{
 		responses: []*http.Response{failResp},
 		errors:    []error{nil},
 	}
 	repo := &stubAntigravityAccountRepo{}
 	account := &Account{
 		ID:       4,
 		Name:     "acc-short-503",
 		Type:     AccountTypeOAuth,
 		Platform: PlatformAntigravity,
 	}
 	// 0.1s < 7s 阈值
 	respBody := []byte(`{
 		"error": {
 			"code": 503,
 			"status": "UNAVAILABLE",
 			"details": [
 				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
 			]
 		}
 	}`)
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 		Body:       io.NopCloser(bytes.NewReader(respBody)),
 	}
 	params := antigravityRetryLoopParams{
 		ctx:          ctxWithSingleAccountRetry(),
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 		accountRepo:  repo,
 		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
 	availableURLs := []string{"https://ag-1.test"}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	// 关键断言：单账号 503 模式下，智能重试耗尽后直接返回 503 响应，不切换
 	require.NotNil(t, result.resp, "should return 503 response directly for single account mode")
 	require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode)
 	require.Nil(t, result.switchError, "should NOT switch account in single account mode")
 	// 关键断言：不设模型限流
 	require.Len(t, repo.modelRateLimitCalls, 0,
 		"should NOT set model rate limit for 503 in single account mode")
 }
 // TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit
 // 对照组：503 + retryDelay < 7s + 无 SingleAccountRetry → 智能重试耗尽后照常设限流
 func TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit(t *testing.T) {
 	failRespBody := `{
 		"error": {
 			"code": 503,
 			"status": "UNAVAILABLE",
 			"details": [
 				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
 			]
 		}
 	}`
 	failResp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 		Body:       io.NopCloser(strings.NewReader(failRespBody)),
 	}
 	upstream := &mockSmartRetryUpstream{
 		responses: []*http.Response{failResp},
 		errors:    []error{nil},
 	}
 	repo := &stubAntigravityAccountRepo{}
 	account := &Account{
 		ID:       5,
 		Name:     "acc-multi-503",
 		Type:     AccountTypeOAuth,
 		Platform: PlatformAntigravity,
 	}
 	respBody := []byte(`{
 		"error": {
 			"code": 503,
 			"status": "UNAVAILABLE",
 			"details": [
 				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
 			]
 		}
 	}`)
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 		Body:       io.NopCloser(bytes.NewReader(respBody)),
 	}
 	params := antigravityRetryLoopParams{
 		ctx:          context.Background(), // 无单账号标记
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 		accountRepo:  repo,
 		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
 	availableURLs := []string{"https://ag-1.test"}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	// 对照：多账号模式应返回 switchError
 	require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
 	// 对照：多账号模式应设模型限流
 	require.Len(t, repo.modelRateLimitCalls, 1,
 		"multi-account mode should set model rate limit")
 }
 // ---------------------------------------------------------------------------
 // 5. handleSingleAccountRetryInPlace 直接测试
 // ---------------------------------------------------------------------------
 // TestHandleSingleAccountRetryInPlace_Success 原地重试成功
 func TestHandleSingleAccountRetryInPlace_Success(t *testing.T) {
 	successResp := &http.Response{
 		StatusCode: http.StatusOK,
 		Header:     http.Header{},
 		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
 	}
 	upstream := &mockSmartRetryUpstream{
 		responses: []*http.Response{successResp},
 		errors:    []error{nil},
 	}
 	account := &Account{
 		ID:          10,
 		Name:        "acc-inplace-ok",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Concurrency: 1,
 	}
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 	}
 	params := antigravityRetryLoopParams{
 		ctx:          ctxWithSingleAccountRetry(),
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 	}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	require.NotNil(t, result.resp, "should return successful response")
 	require.Equal(t, http.StatusOK, result.resp.StatusCode)
 	require.Nil(t, result.switchError, "should not switch account on success")
 	require.Nil(t, result.err)
 }
 // TestHandleSingleAccountRetryInPlace_AllRetriesFail 所有重试都失败，返回 503（不设限流）
 func TestHandleSingleAccountRetryInPlace_AllRetriesFail(t *testing.T) {
 	// 构造 3 个 503 响应（对应 3 次原地重试）
 	var responses []*http.Response
 	var errors []error
 	for i := 0; i < antigravitySingleAccountSmartRetryMaxAttempts; i++ {
 		responses = append(responses, &http.Response{
 			StatusCode: http.StatusServiceUnavailable,
 			Header:     http.Header{},
 			Body: io.NopCloser(strings.NewReader(`{
 				"error": {
 					"code": 503,
 					"status": "UNAVAILABLE",
 					"details": [
 						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
 						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
 					]
 				}
 			}`)),
 		})
 		errors = append(errors, nil)
 	}
 	upstream := &mockSmartRetryUpstream{
 		responses: responses,
 		errors:    errors,
 	}
 	account := &Account{
 		ID:          11,
 		Name:        "acc-inplace-fail",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Concurrency: 1,
 	}
 	origBody := []byte(`{"error":{"code":503,"status":"UNAVAILABLE"}}`)
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{"X-Test": {"original"}},
 	}
 	params := antigravityRetryLoopParams{
 		ctx:          ctxWithSingleAccountRetry(),
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 	}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSingleAccountRetryInPlace(params, resp, origBody, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	// 关键：返回 503 resp，不返回 switchError
 	require.NotNil(t, result.resp, "should return 503 response directly")
 	require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode)
 	require.Nil(t, result.switchError, "should NOT return switchError - let Handler handle it")
 	require.Nil(t, result.err)
 	// 验证确实重试了指定次数
 	require.Len(t, upstream.calls, antigravitySingleAccountSmartRetryMaxAttempts,
 		"should have made exactly maxAttempts retry calls")
 }
 // TestHandleSingleAccountRetryInPlace_WaitDurationClamped 等待时间被限制在 [min, max] 范围
 func TestHandleSingleAccountRetryInPlace_WaitDurationClamped(t *testing.T) {
 	// 用短延迟的成功响应，只验证不 panic
 	successResp := &http.Response{
 		StatusCode: http.StatusOK,
 		Header:     http.Header{},
 		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
 	}
 	upstream := &mockSmartRetryUpstream{
 		responses: []*http.Response{successResp},
 		errors:    []error{nil},
 	}
 	account := &Account{
 		ID:          12,
 		Name:        "acc-clamp",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Concurrency: 1,
 	}
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 	}
 	params := antigravityRetryLoopParams{
 		ctx:          ctxWithSingleAccountRetry(),
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 	}
 	svc := &AntigravityGatewayService{}
 	// 等待时间过大应被 clamp 到 antigravitySingleAccountSmartRetryMaxWait
 	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 999*time.Second, "gemini-3-pro")
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	require.NotNil(t, result.resp)
 	require.Equal(t, http.StatusOK, result.resp.StatusCode)
 }
 // TestHandleSingleAccountRetryInPlace_ContextCanceled context 取消时立即返回
 func TestHandleSingleAccountRetryInPlace_ContextCanceled(t *testing.T) {
 	upstream := &mockSmartRetryUpstream{
 		responses: []*http.Response{nil},
 		errors:    []error{nil},
 	}
 	account := &Account{
 		ID:          13,
 		Name:        "acc-cancel",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Concurrency: 1,
 	}
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 	}
 	ctx, cancel := context.WithCancel(context.Background())
 	ctx = context.WithValue(ctx, ctxkey.SingleAccountRetry, true)
 	cancel() // 立即取消
 	params := antigravityRetryLoopParams{
 		ctx:          ctx,
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 	}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	require.Error(t, result.err, "should return context error")
 	// 不应调用 upstream（因为在等待阶段就被取消了）
 	require.Len(t, upstream.calls, 0, "should not call upstream when context is canceled")
 }
 // TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry 网络错误时继续重试
 func TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry(t *testing.T) {
 	successResp := &http.Response{
 		StatusCode: http.StatusOK,
 		Header:     http.Header{},
 		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
 	}
 	upstream := &mockSmartRetryUpstream{
 		// 第1次网络错误（nil resp），第2次成功
 		responses: []*http.Response{nil, successResp},
 		errors:    []error{nil, nil},
 	}
 	account := &Account{
 		ID:          14,
 		Name:        "acc-net-retry",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Concurrency: 1,
 	}
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 	}
 	params := antigravityRetryLoopParams{
 		ctx:          ctxWithSingleAccountRetry(),
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 	}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	require.NotNil(t, result.resp, "should return successful response after network error recovery")
 	require.Equal(t, http.StatusOK, result.resp.StatusCode)
 	require.Len(t, upstream.calls, 2, "first call fails (network error), second succeeds")
 }
 // ---------------------------------------------------------------------------
 // 6. antigravityRetryLoop 预检查：单账号模式跳过限流
 // ---------------------------------------------------------------------------
 // TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit
 // 预检查中，如果有 SingleAccountRetry 标记，即使账号已限流也跳过直接发请求
 func TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit(t *testing.T) {
 	// 创建一个已设模型限流的账号
 	upstream := &recordingOKUpstream{}
 	account := &Account{
 		ID:          20,
 		Name:        "acc-rate-limited",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Schedulable: true,
 		Status:      StatusActive,
 		Concurrency: 1,
 		Extra: map[string]any{
 			modelRateLimitsKey: map[string]any{
 				"claude-sonnet-4-5": map[string]any{
 					"rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339),
 				},
 			},
 		},
 	}
 	svc := &AntigravityGatewayService{}
 	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
 		ctx:            ctxWithSingleAccountRetry(),
 		prefix:         "[test]",
 		account:        account,
 		accessToken:    "token",
 		action:         "generateContent",
 		body:           []byte(`{"input":"test"}`),
 		httpUpstream:   upstream,
 		requestedModel: "claude-sonnet-4-5",
 		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	})
 	require.NoError(t, err, "should not return error")
 	require.NotNil(t, result, "should return result")
 	require.NotNil(t, result.resp, "should have response")
 	require.Equal(t, http.StatusOK, result.resp.StatusCode)
 	// 关键：尽管限流了，有 SingleAccountRetry 标记时仍然到达了 upstream
 	require.Equal(t, 1, upstream.calls, "should have reached upstream despite rate limit")
 }
 // TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit
 // 对照组：无 SingleAccountRetry + 已限流 → 预检查返回 switchError
 func TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit(t *testing.T) {
 	upstream := &recordingOKUpstream{}
 	account := &Account{
 		ID:          21,
 		Name:        "acc-rate-limited-multi",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Schedulable: true,
 		Status:      StatusActive,
 		Concurrency: 1,
 		Extra: map[string]any{
 			modelRateLimitsKey: map[string]any{
 				"claude-sonnet-4-5": map[string]any{
 					"rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339),
 				},
 			},
 		},
 	}
 	svc := &AntigravityGatewayService{}
 	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
 		ctx:            context.Background(), // 无单账号标记
 		prefix:         "[test]",
 		account:        account,
 		accessToken:    "token",
 		action:         "generateContent",
 		body:           []byte(`{"input":"test"}`),
 		httpUpstream:   upstream,
 		requestedModel: "claude-sonnet-4-5",
 		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	})
 	require.Nil(t, result, "should not return result on rate limit switch")
 	require.NotNil(t, err, "should return error")
 	var switchErr *AntigravityAccountSwitchError
 	require.ErrorAs(t, err, &switchErr, "should return AntigravityAccountSwitchError")
 	require.Equal(t, account.ID, switchErr.OriginalAccountID)
 	require.Equal(t, "claude-sonnet-4-5", switchErr.RateLimitedModel)
 	// upstream 不应被调用（预检查就短路了）
 	require.Equal(t, 0, upstream.calls, "upstream should NOT be called when pre-check blocks")
 }
 // ---------------------------------------------------------------------------
 // 7. 端到端集成场景测试
 // ---------------------------------------------------------------------------
 // TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E
 // 端到端场景：503 + 单账号 + 原地重试第2次成功
 func TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E(t *testing.T) {
 	// 第1次原地重试仍返回 503，第2次成功
 	fail503Body := `{
 		"error": {
 			"code": 503,
 			"status": "UNAVAILABLE",
 			"details": [
 				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
 			]
 		}
 	}`
 	resp503 := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 		Body:       io.NopCloser(strings.NewReader(fail503Body)),
 	}
 	successResp := &http.Response{
 		StatusCode: http.StatusOK,
 		Header:     http.Header{},
 		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
 	}
 	upstream := &mockSmartRetryUpstream{
 		responses: []*http.Response{resp503, successResp},
 		errors:    []error{nil, nil},
 	}
 	account := &Account{
 		ID:          30,
 		Name:        "acc-e2e",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Concurrency: 1,
 	}
 	resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 	}
 	params := antigravityRetryLoopParams{
 		ctx:          ctxWithSingleAccountRetry(),
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 	}
 	svc := &AntigravityGatewayService{}
 	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
 	require.NotNil(t, result)
 	require.Equal(t, smartRetryActionBreakWithResp, result.action)
 	require.NotNil(t, result.resp, "should return successful response after 2nd attempt")
 	require.Equal(t, http.StatusOK, result.resp.StatusCode)
 	require.Nil(t, result.switchError)
 	require.Len(t, upstream.calls, 2, "first 503, second OK")
 }
 // TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E
 // 通过 antigravityRetryLoop → handleSmartRetry → handleSingleAccountRetryInPlace 完整链路
 func TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E(t *testing.T) {
 	// 初始请求返回 503 + 长延迟
 	initial503Body := []byte(`{
 		"error": {
 			"code": 503,
 			"status": "UNAVAILABLE",
 			"details": [
 				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
 				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "10s"}
 			],
 			"message": "No capacity available"
 		}
 	}`)
 	initial503Resp := &http.Response{
 		StatusCode: http.StatusServiceUnavailable,
 		Header:     http.Header{},
 		Body:       io.NopCloser(bytes.NewReader(initial503Body)),
 	}
 	// 原地重试成功
 	successResp := &http.Response{
 		StatusCode: http.StatusOK,
 		Header:     http.Header{},
 		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
 	}
 	upstream := &mockSmartRetryUpstream{
 		// 第1次调用（retryLoop 主循环）返回 503
 		// 第2次调用（handleSingleAccountRetryInPlace 原地重试）返回 200
 		responses: []*http.Response{initial503Resp, successResp},
 		errors:    []error{nil, nil},
 	}
 	repo := &stubAntigravityAccountRepo{}
 	account := &Account{
 		ID:          31,
 		Name:        "acc-e2e-loop",
 		Type:        AccountTypeOAuth,
 		Platform:    PlatformAntigravity,
 		Schedulable: true,
 		Status:      StatusActive,
 		Concurrency: 1,
 	}
 	svc := &AntigravityGatewayService{}
 	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
 		ctx:          ctxWithSingleAccountRetry(),
 		prefix:       "[test]",
 		account:      account,
 		accessToken:  "token",
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 		accountRepo:  repo,
 		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	})
 	require.NoError(t, err, "should not return error on successful retry")
 	require.NotNil(t, result, "should return result")
 	require.NotNil(t, result.resp, "should return response")
 	require.Equal(t, http.StatusOK, result.resp.StatusCode)
 	// 验证未设模型限流
 	require.Len(t, repo.modelRateLimitCalls, 0,
 		"should NOT set model rate limit in single account retry mode")
 }
--- a/backend/internal/service/error_policy_integration_test.go
+++ b/backend/internal/service/error_policy_integration_test.go
@@ -116,7 +116,7 @@ func TestRetryLoop_ErrorPolicy_CustomErrorCodes(t *testing.T) {
 			customCodes:       []any{float64(500)},
 			expectHandleError: 0,
 			expectUpstream:    1,
-			expectStatusCode:  429,
+			expectStatusCode:  500,
 		},
 		{
 			name:              "500_in_custom_codes_matched",
@@ -364,3 +364,109 @@ func TestRetryLoop_ErrorPolicy_NoPolicy_OriginalBehavior(t *testing.T) {
 	require.Equal(t, antigravityMaxRetries, upstream.calls, "should exhaust all retries")
 	require.Equal(t, 1, handleErrorCount, "handleError should be called once after retries exhausted")
 }
 // ---------------------------------------------------------------------------
 // epTrackingRepo — records SetRateLimited / SetError calls for verification.
 // ---------------------------------------------------------------------------
 type epTrackingRepo struct {
 	mockAccountRepoForGemini
 	rateLimitedCalls int
 	rateLimitedID    int64
 	setErrCalls      int
 	setErrID         int64
 	tempCalls        int
 }
 func (r *epTrackingRepo) SetRateLimited(_ context.Context, id int64, _ time.Time) error {
 	r.rateLimitedCalls++
 	r.rateLimitedID = id
 	return nil
 }
 func (r *epTrackingRepo) SetError(_ context.Context, id int64, _ string) error {
 	r.setErrCalls++
 	r.setErrID = id
 	return nil
 }
 func (r *epTrackingRepo) SetTempUnschedulable(_ context.Context, _ int64, _ time.Time, _ string) error {
 	r.tempCalls++
 	return nil
 }
 // ---------------------------------------------------------------------------
 // TestCustomErrorCode599_SkippedErrors_Return500_NoRateLimit
 //
 // 核心场景：自定义错误码设为 [599]（一个不会真正出现的错误码），
 // 当上游返回 429/500/503/401 时：
 //   - 返回给客户端的状态码必须是 500（而不是透传原始状态码）
 //   - 不调用 SetRateLimited（不进入限流状态）
 //   - 不调用 SetError（不停止调度）
 //   - 不调用 handleError
 // ---------------------------------------------------------------------------
 func TestCustomErrorCode599_SkippedErrors_Return500_NoRateLimit(t *testing.T) {
 	errorCodes := []int{429, 500, 503, 401, 403}
 	for _, upstreamStatus := range errorCodes {
 		t.Run(http.StatusText(upstreamStatus), func(t *testing.T) {
 			saveAndSetBaseURLs(t)
 			upstream := &epFixedUpstream{
 				statusCode: upstreamStatus,
 				body:       `{"error":"some upstream error"}`,
 			}
 			repo := &epTrackingRepo{}
 			rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
 			svc := &AntigravityGatewayService{rateLimitService: rlSvc}
 			account := &Account{
 				ID:          500,
 				Type:        AccountTypeAPIKey,
 				Platform:    PlatformAntigravity,
 				Schedulable: true,
 				Status:      StatusActive,
 				Concurrency: 1,
 				Credentials: map[string]any{
 					"custom_error_codes_enabled": true,
 					"custom_error_codes":         []any{float64(599)},
 				},
 			}
 			var handleErrorCount int
 			p := newRetryParams(account, upstream, func(_ context.Context, _ string, _ *Account, _ int, _ http.Header, _ []byte, _ string, _ int64, _ string, _ bool) *handleModelRateLimitResult {
 				handleErrorCount++
 				return nil
 			})
 			result, err := svc.antigravityRetryLoop(p)
 			// 不应返回 error（Skipped 不触发账号切换）
 			require.NoError(t, err, "should not return error")
 			require.NotNil(t, result, "result should not be nil")
 			require.NotNil(t, result.resp, "response should not be nil")
 			defer func() { _ = result.resp.Body.Close() }()
 			// 状态码必须是 500（不透传原始状态码）
 			require.Equal(t, http.StatusInternalServerError, result.resp.StatusCode,
 				"skipped error should return 500, not %d", upstreamStatus)
 			// 不调用 handleError
 			require.Equal(t, 0, handleErrorCount,
 				"handleError should NOT be called for skipped errors")
 			// 不标记限流
 			require.Equal(t, 0, repo.rateLimitedCalls,
 				"SetRateLimited should NOT be called for skipped errors")
 			// 不停止调度
 			require.Equal(t, 0, repo.setErrCalls,
 				"SetError should NOT be called for skipped errors")
 			// 只调用一次上游（不重试）
 			require.Equal(t, 1, upstream.calls,
 				"should call upstream exactly once (no retry)")
 		})
 	}
 }
--- a/backend/internal/service/error_policy_test.go
+++ b/backend/internal/service/error_policy_test.go
@@ -158,6 +158,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 		statusCode        int
 		body              []byte
 		expectedHandled   bool
 		expectedStatus    int  // expected outStatus
 		expectedSwitchErr bool // expect *AntigravityAccountSwitchError
 		handleErrorCalls  int
 	}{
@@ -171,6 +172,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 			statusCode:       500,
 			body:             []byte(`"error"`),
 			expectedHandled:  false,
 			expectedStatus:   500, // passthrough
 			handleErrorCalls: 0,
 		},
 		{
@@ -187,6 +189,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 			statusCode:       500, // not in custom codes
 			body:             []byte(`"error"`),
 			expectedHandled:  true,
 			expectedStatus:   http.StatusInternalServerError, // skipped → 500
 			handleErrorCalls: 0,
 		},
 		{
@@ -203,6 +206,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 			statusCode:       500,
 			body:             []byte(`"error"`),
 			expectedHandled:  true,
 			expectedStatus:   500, // matched → original status
 			handleErrorCalls: 1,
 		},
 		{
@@ -225,6 +229,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 			statusCode:        503,
 			body:              []byte(`overloaded`),
 			expectedHandled:   true,
 			expectedStatus:    503, // temp_unscheduled → original status
 			expectedSwitchErr: true,
 			handleErrorCalls:  0,
 		},
@@ -250,9 +255,10 @@ func TestApplyErrorPolicy(t *testing.T) {
 				isStickySession: true,
 			}
-			handled, retErr := svc.applyErrorPolicy(p, tt.statusCode, http.Header{}, tt.body)
+			handled, outStatus, retErr := svc.applyErrorPolicy(p, tt.statusCode, http.Header{}, tt.body)
 			require.Equal(t, tt.expectedHandled, handled, "handled mismatch")
 			require.Equal(t, tt.expectedStatus, outStatus, "outStatus mismatch")
 			require.Equal(t, tt.handleErrorCalls, handleErrorCount, "handleError call count mismatch")
 			if tt.expectedSwitchErr {
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -243,6 +243,12 @@ var (
 	}
 )
 // systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表
 // OAuth/SetupToken 账号转发时，匹配这些前缀的 system 元素会被移除
 var systemBlockFilterPrefixes = []string{
 	"x-anthropic-billing-header",
 }
 // ErrClaudeCodeOnly 表示分组仅允许 Claude Code 客户端访问
 var ErrClaudeCodeOnly = errors.New("this group only allows Claude Code clients")
@@ -1687,6 +1693,17 @@ func (s *GatewayService) listSchedulableAccounts(ctx context.Context, groupID *i
 	return accounts, useMixed, nil
 }
 // IsSingleAntigravityAccountGroup 检查指定分组是否只有一个 antigravity 平台的可调度账号。
 // 用于 Handler 层在首次请求时提前设置 SingleAccountRetry context，
 // 避免单账号分组收到 503 时错误地设置模型限流标记导致后续请求连续快速失败。
 func (s *GatewayService) IsSingleAntigravityAccountGroup(ctx context.Context, groupID *int64) bool {
 	accounts, _, err := s.listSchedulableAccounts(ctx, groupID, PlatformAntigravity, true)
 	if err != nil {
 		return false
 	}
 	return len(accounts) == 1
 }
 func (s *GatewayService) isAccountAllowedForPlatform(account *Account, platform string, useMixed bool) bool {
 	if account == nil {
 		return false
@@ -2702,6 +2719,60 @@ func hasClaudeCodePrefix(text string) bool {
 	return false
 }
 // matchesFilterPrefix 检查文本是否匹配任一过滤前缀
 func matchesFilterPrefix(text string) bool {
 	for _, prefix := range systemBlockFilterPrefixes {
 		if strings.HasPrefix(text, prefix) {
 			return true
 		}
 	}
 	return false
 }
 // filterSystemBlocksByPrefix 从 body 的 system 中移除文本匹配 systemBlockFilterPrefixes 前缀的元素
 // 直接从 body 解析 system，不依赖外部传入的 parsed.System（因为前置步骤可能已修改 body 中的 system）
 func filterSystemBlocksByPrefix(body []byte) []byte {
 	sys := gjson.GetBytes(body, "system")
 	if !sys.Exists() {
 		return body
 	}
 	switch {
 	case sys.Type == gjson.String:
 		if matchesFilterPrefix(sys.Str) {
 			result, err := sjson.DeleteBytes(body, "system")
 			if err != nil {
 				return body
 			}
 			return result
 		}
 	case sys.IsArray():
 		var parsed []any
 		if err := json.Unmarshal([]byte(sys.Raw), &parsed); err != nil {
 			return body
 		}
 		filtered := make([]any, 0, len(parsed))
 		changed := false
 		for _, item := range parsed {
 			if m, ok := item.(map[string]any); ok {
 				if text, ok := m["text"].(string); ok && matchesFilterPrefix(text) {
 					changed = true
 					continue
 				}
 			}
 			filtered = append(filtered, item)
 		}
 		if changed {
 			result, err := sjson.SetBytes(body, "system", filtered)
 			if err != nil {
 				return body
 			}
 			return result
 		}
 	}
 	return body
 }
 // injectClaudeCodePrompt 在 system 开头注入 Claude Code 提示词
 // 处理 null、字符串、数组三种格式
 func injectClaudeCodePrompt(body []byte, system any) []byte {
@@ -2981,6 +3052,12 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 		body, reqModel = normalizeClaudeOAuthRequestBody(body, reqModel, normalizeOpts)
 	}
 	// OAuth/SetupToken 账号：移除黑名单前缀匹配的 system 元素（如客户端注入的计费元数据）
 	// 放在 inject/normalize 之后，确保不会被覆盖
 	if account.IsOAuth() {
 		body = filterSystemBlocksByPrefix(body)
 	}
 	// 强制执行 cache_control 块数量限制（最多 4 个）
 	body = enforceCacheControlLimit(body)
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -771,6 +771,14 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
 			break
 		}
 		// 错误策略优先：匹配则跳过重试直接处理。
 		if matched, rebuilt := s.checkErrorPolicyInLoop(ctx, account, resp); matched {
 			resp = rebuilt
 			break
 		} else {
 			resp = rebuilt
 		}
 		if resp.StatusCode >= 400 && s.shouldRetryGeminiUpstreamError(account, resp.StatusCode) {
 			respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
 			_ = resp.Body.Close()
@@ -840,7 +848,7 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
 				if upstreamReqID == "" {
 					upstreamReqID = resp.Header.Get("x-goog-request-id")
 				}
-				return nil, s.writeGeminiMappedError(c, account, resp.StatusCode, upstreamReqID, respBody)
+				return nil, s.writeGeminiMappedError(c, account, http.StatusInternalServerError, upstreamReqID, respBody)
 			case ErrorPolicyMatched, ErrorPolicyTempUnscheduled:
 				s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
 				upstreamReqID := resp.Header.Get(requestIDHeader)
@@ -1178,6 +1186,14 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 			return nil, s.writeGoogleError(c, http.StatusBadGateway, "Upstream request failed after retries: "+safeErr)
 		}
 		// 错误策略优先：匹配则跳过重试直接处理。
 		if matched, rebuilt := s.checkErrorPolicyInLoop(ctx, account, resp); matched {
 			resp = rebuilt
 			break
 		} else {
 			resp = rebuilt
 		}
 		if resp.StatusCode >= 400 && s.shouldRetryGeminiUpstreamError(account, resp.StatusCode) {
 			respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
 			_ = resp.Body.Close()
@@ -1285,7 +1301,7 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 				if contentType == "" {
 					contentType = "application/json"
 				}
-				c.Data(resp.StatusCode, contentType, respBody)
+				c.Data(http.StatusInternalServerError, contentType, respBody)
 				return nil, fmt.Errorf("gemini upstream error: %d (skipped by error policy)", resp.StatusCode)
 			case ErrorPolicyMatched, ErrorPolicyTempUnscheduled:
 				s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
@@ -1427,6 +1443,26 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 	}, nil
 }
 // checkErrorPolicyInLoop 在重试循环内预检查错误策略。
 // 返回 true 表示策略已匹配（调用者应 break），resp 已重建可直接使用。
 // 返回 false 表示 ErrorPolicyNone，resp 已重建，调用者继续走重试逻辑。
 func (s *GeminiMessagesCompatService) checkErrorPolicyInLoop(
 	ctx context.Context, account *Account, resp *http.Response,
 ) (matched bool, rebuilt *http.Response) {
 	if resp.StatusCode < 400 || s.rateLimitService == nil {
 		return false, resp
 	}
 	body, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
 	_ = resp.Body.Close()
 	rebuilt = &http.Response{
 		StatusCode: resp.StatusCode,
 		Header:     resp.Header.Clone(),
 		Body:       io.NopCloser(bytes.NewReader(body)),
 	}
 	policy := s.rateLimitService.CheckErrorPolicy(ctx, account, resp.StatusCode, body)
 	return policy != ErrorPolicyNone, rebuilt
 }
 func (s *GeminiMessagesCompatService) shouldRetryGeminiUpstreamError(account *Account, statusCode int) bool {
 	switch statusCode {
 	case 429, 500, 502, 503, 504, 529:
@@ -2576,6 +2612,10 @@ func asInt(v any) (int, bool) {
 }
 func (s *GeminiMessagesCompatService) handleGeminiUpstreamError(ctx context.Context, account *Account, statusCode int, headers http.Header, body []byte) {
 	// 遵守自定义错误码策略：未命中则跳过所有限流处理
 	if !account.ShouldHandleErrorCode(statusCode) {
 		return
 	}
 	if s.rateLimitService != nil && (statusCode == 401 || statusCode == 403 || statusCode == 529) {
 		s.rateLimitService.HandleUpstreamError(ctx, account, statusCode, headers, body)
 		return
--- a/backend/internal/service/ratelimit_service.go
+++ b/backend/internal/service/ratelimit_service.go
@@ -623,6 +623,10 @@ func (s *RateLimitService) ClearTempUnschedulable(ctx context.Context, accountID
 			slog.Warn("temp_unsched_cache_delete_failed", "account_id", accountID, "error", err)
 		}
 	}
 	// 同时清除模型级别限流
 	if err := s.accountRepo.ClearModelRateLimits(ctx, accountID); err != nil {
 		slog.Warn("clear_model_rate_limits_on_temp_unsched_reset_failed", "account_id", accountID, "error", err)
 	}
 	return nil
 }
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -17,7 +17,7 @@
  "dependencies": {
    "@lobehub/icons": "^4.0.2",
    "@vueuse/core": "^10.7.0",
-    "axios": "^1.6.2",
+    "axios": "^1.13.5",
    "chart.js": "^4.4.1",
    "dompurify": "^3.3.1",
    "driver.js": "^1.4.0",
--- a/frontend/pnpm-lock.yaml
+++ b/frontend/pnpm-lock.yaml
@@ -15,8 +15,8 @@ importers:
        specifier: ^10.7.0
        version: 10.11.1(vue@3.5.26(typescript@5.6.3))
      axios:
-        specifier: ^1.6.2
+        specifier: ^1.13.5
-        version: 1.13.2
+        version: 1.13.5
      chart.js:
        specifier: ^4.4.1
        version: 4.5.1
@@ -1257,56 +1257,67 @@ packages:
    resolution: {integrity: sha512-EHMUcDwhtdRGlXZsGSIuXSYwD5kOT9NVnx9sqzYiwAc91wfYOE1g1djOEDseZJKKqtHAHGwnGPQu3kytmfaXLQ==}
    cpu: [arm]
    os: [linux]
    libc: [glibc]
  '@rollup/rollup-linux-arm-musleabihf@4.54.0':
    resolution: {integrity: sha512-+pBrqEjaakN2ySv5RVrj/qLytYhPKEUwk+e3SFU5jTLHIcAtqh2rLrd/OkbNuHJpsBgxsD8ccJt5ga/SeG0JmA==}
    cpu: [arm]
    os: [linux]
    libc: [musl]
  '@rollup/rollup-linux-arm64-gnu@4.54.0':
    resolution: {integrity: sha512-NSqc7rE9wuUaRBsBp5ckQ5CVz5aIRKCwsoa6WMF7G01sX3/qHUw/z4pv+D+ahL1EIKy6Enpcnz1RY8pf7bjwng==}
    cpu: [arm64]
    os: [linux]
    libc: [glibc]
  '@rollup/rollup-linux-arm64-musl@4.54.0':
    resolution: {integrity: sha512-gr5vDbg3Bakga5kbdpqx81m2n9IX8M6gIMlQQIXiLTNeQW6CucvuInJ91EuCJ/JYvc+rcLLsDFcfAD1K7fMofg==}
    cpu: [arm64]
    os: [linux]
    libc: [musl]
  '@rollup/rollup-linux-loong64-gnu@4.54.0':
    resolution: {integrity: sha512-gsrtB1NA3ZYj2vq0Rzkylo9ylCtW/PhpLEivlgWe0bpgtX5+9j9EZa0wtZiCjgu6zmSeZWyI/e2YRX1URozpIw==}
    cpu: [loong64]
    os: [linux]
    libc: [glibc]
  '@rollup/rollup-linux-ppc64-gnu@4.54.0':
    resolution: {integrity: sha512-y3qNOfTBStmFNq+t4s7Tmc9hW2ENtPg8FeUD/VShI7rKxNW7O4fFeaYbMsd3tpFlIg1Q8IapFgy7Q9i2BqeBvA==}
    cpu: [ppc64]
    os: [linux]
    libc: [glibc]
  '@rollup/rollup-linux-riscv64-gnu@4.54.0':
    resolution: {integrity: sha512-89sepv7h2lIVPsFma8iwmccN7Yjjtgz0Rj/Ou6fEqg3HDhpCa+Et+YSufy27i6b0Wav69Qv4WBNl3Rs6pwhebQ==}
    cpu: [riscv64]
    os: [linux]
    libc: [glibc]
  '@rollup/rollup-linux-riscv64-musl@4.54.0':
    resolution: {integrity: sha512-ZcU77ieh0M2Q8Ur7D5X7KvK+UxbXeDHwiOt/CPSBTI1fBmeDMivW0dPkdqkT4rOgDjrDDBUed9x4EgraIKoR2A==}
    cpu: [riscv64]
    os: [linux]
    libc: [musl]
  '@rollup/rollup-linux-s390x-gnu@4.54.0':
    resolution: {integrity: sha512-2AdWy5RdDF5+4YfG/YesGDDtbyJlC9LHmL6rZw6FurBJ5n4vFGupsOBGfwMRjBYH7qRQowT8D/U4LoSvVwOhSQ==}
    cpu: [s390x]
    os: [linux]
    libc: [glibc]
  '@rollup/rollup-linux-x64-gnu@4.54.0':
    resolution: {integrity: sha512-WGt5J8Ij/rvyqpFexxk3ffKqqbLf9AqrTBbWDk7ApGUzaIs6V+s2s84kAxklFwmMF/vBNGrVdYgbblCOFFezMQ==}
    cpu: [x64]
    os: [linux]
    libc: [glibc]
  '@rollup/rollup-linux-x64-musl@4.54.0':
    resolution: {integrity: sha512-JzQmb38ATzHjxlPHuTH6tE7ojnMKM2kYNzt44LO/jJi8BpceEC8QuXYA908n8r3CNuG/B3BV8VR3Hi1rYtmPiw==}
    cpu: [x64]
    os: [linux]
    libc: [musl]
  '@rollup/rollup-openharmony-arm64@4.54.0':
    resolution: {integrity: sha512-huT3fd0iC7jigGh7n3q/+lfPcXxBi+om/Rs3yiFxjvSxbSB6aohDFXbWvlspaqjeOh+hx7DDHS+5Es5qRkWkZg==}
@@ -1805,8 +1816,8 @@ packages:
    peerDependencies:
      postcss: ^8.1.0
-  axios@1.13.2:
+  axios@1.13.5:
-    resolution: {integrity: sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==}
+    resolution: {integrity: sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==}
  babel-plugin-macros@3.1.0:
    resolution: {integrity: sha512-Cg7TFGpIr01vOQNODXOOaGz2NpCU5gl8x1qJFbb6hbZxR7XrcE2vtbAsTAbJ7/xwJtUuJEw8K8Zr/AE0LHlesg==}
@@ -6387,7 +6398,7 @@ snapshots:
      postcss: 8.5.6
      postcss-value-parser: 4.2.0
-  axios@1.13.2:
+  axios@1.13.5:
    dependencies:
      follow-redirects: 1.15.11
      form-data: 4.0.5
--- a/frontend/src/components/account/CreateAccountModal.vue
+++ b/frontend/src/components/account/CreateAccountModal.vue
@@ -665,8 +665,8 @@
              <Icon name="cloud" size="sm" />
            </div>
            <div>
-              <span class="block text-sm font-medium text-gray-900 dark:text-white">{{ t('admin.accounts.types.upstream') }}</span>
+              <span class="block text-sm font-medium text-gray-900 dark:text-white">API Key</span>
-              <span class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.accounts.types.upstreamDesc') }}</span>
+              <span class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.accounts.types.antigravityApikey') }}</span>
            </div>
          </button>
        </div>
@@ -681,7 +681,7 @@
            type="text"
            required
            class="input"
-            placeholder="https://s.konstants.xyz"
+            placeholder="https://cloudcode-pa.googleapis.com"
          />
          <p class="input-hint">{{ t('admin.accounts.upstream.baseUrlHint') }}</p>
        </div>
@@ -816,8 +816,8 @@
        </div>
      </div>
-      <!-- API Key input (only for apikey type) -->
+      <!-- API Key input (only for apikey type, excluding Antigravity which has its own fields) -->
-      <div v-if="form.type === 'apikey'" class="space-y-4">
+      <div v-if="form.type === 'apikey' && form.platform !== 'antigravity'" class="space-y-4">
        <div>
          <label class="input-label">{{ t('admin.accounts.baseUrl') }}</label>
          <input
@@ -862,7 +862,7 @@
          <p class="input-hint">{{ t('admin.accounts.gemini.tier.aiStudioHint') }}</p>
        </div>
-        <!-- Model Restriction Section (不适用于 Gemini) -->
+        <!-- Model Restriction Section (不适用于 Gemini，Antigravity 已在上层条件排除) -->
        <div v-if="form.platform !== 'gemini'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
          <label class="input-label">{{ t('admin.accounts.modelRestriction') }}</label>
--- a/frontend/src/components/account/EditAccountModal.vue
+++ b/frontend/src/components/account/EditAccountModal.vue
@@ -39,7 +39,9 @@
                ? 'https://api.openai.com'
                : account.platform === 'gemini'
                  ? 'https://generativelanguage.googleapis.com'
-                  : 'https://api.anthropic.com'
+                  : account.platform === 'antigravity'
                    ? 'https://cloudcode-pa.googleapis.com'
                    : 'https://api.anthropic.com'
            "
          />
          <p class="input-hint">{{ baseUrlHint }}</p>
@@ -55,14 +57,16 @@
                ? 'sk-proj-...'
                : account.platform === 'gemini'
                  ? 'AIza...'
-                  : 'sk-ant-...'
+                  : account.platform === 'antigravity'
                    ? 'sk-...'
                    : 'sk-ant-...'
            "
          />
          <p class="input-hint">{{ t('admin.accounts.leaveEmptyToKeep') }}</p>
        </div>
-        <!-- Model Restriction Section (不适用于 Gemini) -->
+        <!-- Model Restriction Section (不适用于 Gemini 和 Antigravity) -->
-        <div v-if="account.platform !== 'gemini'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
+        <div v-if="account.platform !== 'gemini' && account.platform !== 'antigravity'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
          <label class="input-label">{{ t('admin.accounts.modelRestriction') }}</label>
          <!-- Mode Toggle -->
@@ -372,7 +376,7 @@
            v-model="editBaseUrl"
            type="text"
            class="input"
-            placeholder="https://s.konstants.xyz"
+            placeholder="https://cloudcode-pa.googleapis.com"
          />
          <p class="input-hint">{{ t('admin.accounts.upstream.baseUrlHint') }}</p>
        </div>
--- a/frontend/src/components/admin/account/AccountActionMenu.vue
+++ b/frontend/src/components/admin/account/AccountActionMenu.vue
@@ -53,7 +53,19 @@ import type { Account } from '@/types'
 const props = defineProps<{ show: boolean; account: Account | null; position: { top: number; left: number } | null }>()
 const emit = defineEmits(['close', 'test', 'stats', 'reauth', 'refresh-token', 'reset-status', 'clear-rate-limit'])
 const { t } = useI18n()
-const isRateLimited = computed(() => props.account?.rate_limit_reset_at && new Date(props.account.rate_limit_reset_at) > new Date())
+const isRateLimited = computed(() => {
  if (props.account?.rate_limit_reset_at && new Date(props.account.rate_limit_reset_at) > new Date()) {
    return true
  }
  const modelLimits = (props.account?.extra as Record<string, unknown> | undefined)?.model_rate_limits as
    | Record<string, { rate_limit_reset_at: string }>
    | undefined
  if (modelLimits) {
    const now = new Date()
    return Object.values(modelLimits).some(info => new Date(info.rate_limit_reset_at) > now)
  }
  return false
 })
 const isOverloaded = computed(() => props.account?.overload_until && new Date(props.account.overload_until) > new Date())
 const handleKeydown = (event: KeyboardEvent) => {
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -1369,6 +1369,7 @@ export default {
        googleOauth: 'Google OAuth',
        codeAssist: 'Code Assist',
        antigravityOauth: 'Antigravity OAuth',
        antigravityApikey: 'Connect via Base URL + API Key',
        upstream: 'Upstream',
        upstreamDesc: 'Connect via Base URL + API Key'
      },
@@ -1642,7 +1643,7 @@ export default {
      // Upstream type
      upstream: {
        baseUrl: 'Upstream Base URL',
-        baseUrlHint: 'The address of the upstream Antigravity service, e.g., https://s.konstants.xyz',
+        baseUrlHint: 'The address of the upstream Antigravity service, e.g., https://cloudcode-pa.googleapis.com',
        apiKey: 'Upstream API Key',
        apiKeyHint: 'API Key for the upstream service',
        pleaseEnterBaseUrl: 'Please enter upstream Base URL',
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -1503,6 +1503,7 @@ export default {
        googleOauth: 'Google OAuth',
        codeAssist: 'Code Assist',
        antigravityOauth: 'Antigravity OAuth',
        antigravityApikey: '通过 Base URL + API Key 连接',
        upstream: '对接上游',
        upstreamDesc: '通过 Base URL + API Key 连接上游',
        api_key: 'API Key',
@@ -1788,7 +1789,7 @@ export default {
      // Upstream type
      upstream: {
        baseUrl: '上游 Base URL',
-        baseUrlHint: '上游 Antigravity 服务的地址，例如：https://s.konstants.xyz',
+        baseUrlHint: '上游 Antigravity 服务的地址，例如：https://cloudcode-pa.googleapis.com',
        apiKey: '上游 API Key',
        apiKeyHint: '上游服务的 API Key',
        pleaseEnterBaseUrl: '请输入上游 Base URL',