diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go
index af20e318..415a38de 100644
--- a/backend/internal/handler/gateway_handler.go
+++ b/backend/internal/handler/gateway_handler.go
@@ -240,6 +240,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 		var lastFailoverErr *service.UpstreamFailoverError
 		var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
 
+		// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
+		// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
+		if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
+			ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+			c.Request = c.Request.WithContext(ctx)
+		}
+
 		for {
 			selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, reqModel, failedAccountIDs, "") // Gemini 不使用会话限制
 			if err != nil {
@@ -248,6 +255,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
 					return
 				}
+				// Antigravity 单账号退避重试：分组内没有其他可用账号时，
+				// 对 503 错误不直接返回，而是清除排除列表、等待退避后重试同一个账号。
+				// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
+				if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
+					if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
+						log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
+						failedAccountIDs = make(map[int64]struct{})
+						// 设置 context 标记，让 Service 层预检查等待限流过期而非直接切换
+						ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+						c.Request = c.Request.WithContext(ctx)
+						continue
+					}
+				}
 				if lastFailoverErr != nil {
 					h.handleFailoverExhausted(c, lastFailoverErr, service.PlatformGemini, streamStarted)
 				} else {
@@ -397,6 +417,13 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 	}
 	fallbackUsed := false
 
+	// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
+	// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
+	if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), currentAPIKey.GroupID) {
+		ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+		c.Request = c.Request.WithContext(ctx)
+	}
+
 	for {
 		maxAccountSwitches := h.maxAccountSwitches
 		switchCount := 0
@@ -414,6 +441,19 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
 					h.handleStreamingAwareError(c, http.StatusServiceUnavailable, "api_error", "Service temporarily unavailable", streamStarted)
 					return
 				}
+				// Antigravity 单账号退避重试：分组内没有其他可用账号时，
+				// 对 503 错误不直接返回，而是清除排除列表、等待退避后重试同一个账号。
+				// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
+				if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
+					if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
+						log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
+						failedAccountIDs = make(map[int64]struct{})
+						// 设置 context 标记，让 Service 层预检查等待限流过期而非直接切换
+						ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+						c.Request = c.Request.WithContext(ctx)
+						continue
+					}
+				}
 				if lastFailoverErr != nil {
 					h.handleFailoverExhausted(c, lastFailoverErr, platform, streamStarted)
 				} else {
@@ -851,6 +891,27 @@ func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
 	}
 }
 
+// sleepAntigravitySingleAccountBackoff Antigravity 平台单账号分组的 503 退避重试延时。
+// 当分组内只有一个可用账号且上游返回 503（MODEL_CAPACITY_EXHAUSTED）时使用，
+// 采用短固定延时策略。Service 层在 SingleAccountRetry 模式下已经做了充分的原地重试
+// （最多 3 次、总等待 30s），所以 Handler 层的退避只需短暂等待即可。
+// 返回 false 表示 context 已取消。
+func sleepAntigravitySingleAccountBackoff(ctx context.Context, retryCount int) bool {
+	// 固定短延时：2s
+	// Service 层已经在原地等待了足够长的时间（retryDelay × 重试次数），
+	// Handler 层只需短暂间隔后重新进入 Service 层即可。
+	const delay = 2 * time.Second
+
+	log.Printf("Antigravity single-account 503 backoff: waiting %v before retry (attempt %d)", delay, retryCount)
+
+	select {
+	case <-ctx.Done():
+		return false
+	case <-time.After(delay):
+		return true
+	}
+}
+
 func (h *GatewayHandler) handleFailoverExhausted(c *gin.Context, failoverErr *service.UpstreamFailoverError, platform string, streamStarted bool) {
 	statusCode := failoverErr.StatusCode
 	responseBody := failoverErr.ResponseBody
diff --git a/backend/internal/handler/gateway_handler_single_account_retry_test.go b/backend/internal/handler/gateway_handler_single_account_retry_test.go
new file mode 100644
index 00000000..96aa14c6
--- /dev/null
+++ b/backend/internal/handler/gateway_handler_single_account_retry_test.go
@@ -0,0 +1,51 @@
+package handler
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// sleepAntigravitySingleAccountBackoff 测试
+// ---------------------------------------------------------------------------
+
+func TestSleepAntigravitySingleAccountBackoff_ReturnsTrue(t *testing.T) {
+	ctx := context.Background()
+	start := time.Now()
+	ok := sleepAntigravitySingleAccountBackoff(ctx, 1)
+	elapsed := time.Since(start)
+
+	require.True(t, ok, "should return true when context is not canceled")
+	// 固定延迟 2s
+	require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond, "should wait approximately 2s")
+	require.Less(t, elapsed, 5*time.Second, "should not wait too long")
+}
+
+func TestSleepAntigravitySingleAccountBackoff_ContextCanceled(t *testing.T) {
+	ctx, cancel := context.WithCancel(context.Background())
+	cancel() // 立即取消
+
+	start := time.Now()
+	ok := sleepAntigravitySingleAccountBackoff(ctx, 1)
+	elapsed := time.Since(start)
+
+	require.False(t, ok, "should return false when context is canceled")
+	require.Less(t, elapsed, 500*time.Millisecond, "should return immediately on cancel")
+}
+
+func TestSleepAntigravitySingleAccountBackoff_FixedDelay(t *testing.T) {
+	// 验证不同 retryCount 都使用固定 2s 延迟
+	ctx := context.Background()
+
+	start := time.Now()
+	ok := sleepAntigravitySingleAccountBackoff(ctx, 5)
+	elapsed := time.Since(start)
+
+	require.True(t, ok)
+	// 即使 retryCount=5，延迟仍然是固定的 2s
+	require.GreaterOrEqual(t, elapsed, 1500*time.Millisecond)
+	require.Less(t, elapsed, 5*time.Second)
+}
diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go
index d5149f22..f8fb0dcb 100644
--- a/backend/internal/handler/gemini_v1beta_handler.go
+++ b/backend/internal/handler/gemini_v1beta_handler.go
@@ -327,6 +327,13 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 	var lastFailoverErr *service.UpstreamFailoverError
 	var forceCacheBilling bool // 粘性会话切换时的缓存计费标记
 
+	// 单账号分组提前设置 SingleAccountRetry 标记，让 Service 层首次 503 就不设模型限流标记。
+	// 避免单账号分组收到 503 (MODEL_CAPACITY_EXHAUSTED) 时设 29s 限流，导致后续请求连续快速失败。
+	if h.gatewayService.IsSingleAntigravityAccountGroup(c.Request.Context(), apiKey.GroupID) {
+		ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+		c.Request = c.Request.WithContext(ctx)
+	}
+
 	for {
 		selection, err := h.gatewayService.SelectAccountWithLoadAwareness(c.Request.Context(), apiKey.GroupID, sessionKey, modelName, failedAccountIDs, "") // Gemini 不使用会话限制
 		if err != nil {
@@ -334,6 +341,19 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
 				googleError(c, http.StatusServiceUnavailable, "No available Gemini accounts: "+err.Error())
 				return
 			}
+			// Antigravity 单账号退避重试：分组内没有其他可用账号时，
+			// 对 503 错误不直接返回，而是清除排除列表、等待退避后重试同一个账号。
+			// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
+			if lastFailoverErr != nil && lastFailoverErr.StatusCode == http.StatusServiceUnavailable && switchCount <= maxAccountSwitches {
+				if sleepAntigravitySingleAccountBackoff(c.Request.Context(), switchCount) {
+					log.Printf("Antigravity single-account 503 retry: clearing failed accounts, retry %d/%d", switchCount, maxAccountSwitches)
+					failedAccountIDs = make(map[int64]struct{})
+					// 设置 context 标记，让 Service 层预检查等待限流过期而非直接切换
+					ctx := context.WithValue(c.Request.Context(), ctxkey.SingleAccountRetry, true)
+					c.Request = c.Request.WithContext(ctx)
+					continue
+				}
+			}
 			h.handleGeminiFailoverExhausted(c, lastFailoverErr)
 			return
 		}
diff --git a/backend/internal/pkg/antigravity/request_transformer.go b/backend/internal/pkg/antigravity/request_transformer.go
index 65f45cfc..e89a4c53 100644
--- a/backend/internal/pkg/antigravity/request_transformer.go
+++ b/backend/internal/pkg/antigravity/request_transformer.go
@@ -271,6 +271,21 @@ func filterOpenCodePrompt(text string) string {
 	return ""
 }
 
+// systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表
+var systemBlockFilterPrefixes = []string{
+	"x-anthropic-billing-header",
+}
+
+// filterSystemBlockByPrefix 如果文本匹配过滤前缀，返回空字符串
+func filterSystemBlockByPrefix(text string) string {
+	for _, prefix := range systemBlockFilterPrefixes {
+		if strings.HasPrefix(text, prefix) {
+			return ""
+		}
+	}
+	return text
+}
+
 // buildSystemInstruction 构建 systemInstruction（与 Antigravity-Manager 保持一致）
 func buildSystemInstruction(system json.RawMessage, modelName string, opts TransformOptions, tools []ClaudeTool) *GeminiContent {
 	var parts []GeminiPart
@@ -287,8 +302,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans
 				if strings.Contains(sysStr, "You are Antigravity") {
 					userHasAntigravityIdentity = true
 				}
-				// 过滤 OpenCode 默认提示词
-				filtered := filterOpenCodePrompt(sysStr)
+				// 过滤 OpenCode 默认提示词和黑名单前缀
+				filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(sysStr))
 				if filtered != "" {
 					userSystemParts = append(userSystemParts, GeminiPart{Text: filtered})
 				}
@@ -302,8 +317,8 @@ func buildSystemInstruction(system json.RawMessage, modelName string, opts Trans
 						if strings.Contains(block.Text, "You are Antigravity") {
 							userHasAntigravityIdentity = true
 						}
-						// 过滤 OpenCode 默认提示词
-						filtered := filterOpenCodePrompt(block.Text)
+						// 过滤 OpenCode 默认提示词和黑名单前缀
+						filtered := filterSystemBlockByPrefix(filterOpenCodePrompt(block.Text))
 						if filtered != "" {
 							userSystemParts = append(userSystemParts, GeminiPart{Text: filtered})
 						}
diff --git a/backend/internal/pkg/ctxkey/ctxkey.go b/backend/internal/pkg/ctxkey/ctxkey.go
index 9bf563e7..0c4d82f7 100644
--- a/backend/internal/pkg/ctxkey/ctxkey.go
+++ b/backend/internal/pkg/ctxkey/ctxkey.go
@@ -28,4 +28,8 @@ const (
 	// IsMaxTokensOneHaikuRequest 标识当前请求是否为 max_tokens=1 + haiku 模型的探测请求
 	// 用于 ClaudeCodeOnly 验证绕过（绕过 system prompt 检查，但仍需验证 User-Agent）
 	IsMaxTokensOneHaikuRequest Key = "ctx_is_max_tokens_one_haiku"
+
+	// SingleAccountRetry 标识当前请求处于单账号 503 退避重试模式。
+	// 在此模式下，Service 层的模型限流预检查将等待限流过期而非直接切换账号。
+	SingleAccountRetry Key = "ctx_single_account_retry"
 )
diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go
index 7abe4f3a..457dd964 100644
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -20,6 +20,7 @@ import (
 	"time"
 
 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
 	"github.com/tidwall/gjson"
@@ -47,6 +48,19 @@ const (
 	googleRPCTypeErrorInfo                = "type.googleapis.com/google.rpc.ErrorInfo"
 	googleRPCReasonModelCapacityExhausted = "MODEL_CAPACITY_EXHAUSTED"
 	googleRPCReasonRateLimitExceeded      = "RATE_LIMIT_EXCEEDED"
+
+	// 单账号 503 退避重试：Service 层原地重试的最大次数
+	// 在 handleSmartRetry 中，对于 shouldRateLimitModel（长延迟 ≥ 7s）的情况，
+	// 多账号模式下会设限流+切换账号；但单账号模式下改为原地等待+重试。
+	antigravitySingleAccountSmartRetryMaxAttempts = 3
+
+	// 单账号 503 退避重试：原地重试时单次最大等待时间
+	// 防止上游返回过长的 retryDelay 导致请求卡住太久
+	antigravitySingleAccountSmartRetryMaxWait = 15 * time.Second
+
+	// 单账号 503 退避重试：原地重试的总累计等待时间上限
+	// 超过此上限将不再重试，直接返回 503
+	antigravitySingleAccountSmartRetryTotalMaxWait = 30 * time.Second
 )
 
 // antigravityPassthroughErrorMessages 透传给客户端的错误消息白名单（小写）
@@ -149,6 +163,13 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 
 	// 情况1: retryDelay >= 阈值，限流模型并切换账号
 	if shouldRateLimitModel {
+		// 单账号 503 退避重试模式：不设限流、不切换账号，改为原地等待+重试
+		// 谷歌上游 503 (MODEL_CAPACITY_EXHAUSTED) 通常是暂时性的，等几秒就能恢复。
+		// 多账号场景下切换账号是最优选择，但单账号场景下设限流毫无意义（只会导致双重等待）。
+		if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) {
+			return s.handleSingleAccountRetryInPlace(p, resp, respBody, baseURL, waitDuration, modelName)
+		}
+
 		rateLimitDuration := waitDuration
 		if rateLimitDuration <= 0 {
 			rateLimitDuration = antigravityDefaultRateLimitDuration
@@ -237,7 +258,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 			}
 		}
 
-		// 所有重试都失败，限流当前模型并切换账号
+		// 所有重试都失败
 		rateLimitDuration := waitDuration
 		if rateLimitDuration <= 0 {
 			rateLimitDuration = antigravityDefaultRateLimitDuration
@@ -246,6 +267,22 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 		if retryBody == nil {
 			retryBody = respBody
 		}
+
+		// 单账号 503 退避重试模式：智能重试耗尽后不设限流、不切换账号，
+		// 直接返回 503 让 Handler 层的单账号退避循环做最终处理。
+		if resp.StatusCode == http.StatusServiceUnavailable && isSingleAccountRetry(p.ctx) {
+			log.Printf("%s status=%d smart_retry_exhausted_single_account attempts=%d model=%s account=%d body=%s (return 503 directly)",
+				p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200))
+			return &smartRetryResult{
+				action: smartRetryActionBreakWithResp,
+				resp: &http.Response{
+					StatusCode: resp.StatusCode,
+					Header:     resp.Header.Clone(),
+					Body:       io.NopCloser(bytes.NewReader(retryBody)),
+				},
+			}
+		}
+
 		log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)",
 			p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200))
 
@@ -280,17 +317,152 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 	return &smartRetryResult{action: smartRetryActionContinue}
 }
 
+// handleSingleAccountRetryInPlace 单账号 503 退避重试的原地重试逻辑。
+//
+// 在多账号场景下，收到 503 + 长 retryDelay（≥ 7s）时会设置模型限流 + 切换账号；
+// 但在单账号场景下，设限流毫无意义（因为切换回来的还是同一个账号，还要等限流过期）。
+// 此方法改为在 Service 层原地等待 + 重试，避免双重等待问题：
+//
+//	旧流程：Service 设限流 → Handler 退避等待 → Service 等限流过期 → 再请求（总耗时 = 退避 + 限流）
+//	新流程：Service 直接等 retryDelay → 重试 → 成功/再等 → 重试...（总耗时 ≈ 实际 retryDelay × 重试次数）
+//
+// 约束：
+//   - 单次等待不超过 antigravitySingleAccountSmartRetryMaxWait
+//   - 总累计等待不超过 antigravitySingleAccountSmartRetryTotalMaxWait
+//   - 最多重试 antigravitySingleAccountSmartRetryMaxAttempts 次
+func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace(
+	p antigravityRetryLoopParams,
+	resp *http.Response,
+	respBody []byte,
+	baseURL string,
+	waitDuration time.Duration,
+	modelName string,
+) *smartRetryResult {
+	// 限制单次等待时间
+	if waitDuration > antigravitySingleAccountSmartRetryMaxWait {
+		waitDuration = antigravitySingleAccountSmartRetryMaxWait
+	}
+	if waitDuration < antigravitySmartRetryMinWait {
+		waitDuration = antigravitySmartRetryMinWait
+	}
+
+	log.Printf("%s status=%d single_account_503_retry_in_place model=%s account=%d upstream_retry_delay=%v (retrying in-place instead of rate-limiting)",
+		p.prefix, resp.StatusCode, modelName, p.account.ID, waitDuration)
+
+	var lastRetryResp *http.Response
+	var lastRetryBody []byte
+	totalWaited := time.Duration(0)
+
+	for attempt := 1; attempt <= antigravitySingleAccountSmartRetryMaxAttempts; attempt++ {
+		// 检查累计等待是否超限
+		if totalWaited+waitDuration > antigravitySingleAccountSmartRetryTotalMaxWait {
+			remaining := antigravitySingleAccountSmartRetryTotalMaxWait - totalWaited
+			if remaining <= 0 {
+				log.Printf("%s single_account_503_retry: total_wait_exceeded total=%v max=%v, giving up",
+					p.prefix, totalWaited, antigravitySingleAccountSmartRetryTotalMaxWait)
+				break
+			}
+			waitDuration = remaining
+		}
+
+		log.Printf("%s status=%d single_account_503_retry attempt=%d/%d delay=%v total_waited=%v model=%s account=%d",
+			p.prefix, resp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, waitDuration, totalWaited, modelName, p.account.ID)
+
+		select {
+		case <-p.ctx.Done():
+			log.Printf("%s status=context_canceled_during_single_account_retry", p.prefix)
+			return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
+		case <-time.After(waitDuration):
+		}
+		totalWaited += waitDuration
+
+		// 创建新请求
+		retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
+		if err != nil {
+			log.Printf("%s single_account_503_retry: request_build_failed error=%v", p.prefix, err)
+			break
+		}
+
+		retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
+		if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
+			log.Printf("%s status=%d single_account_503_retry_success attempt=%d/%d total_waited=%v",
+				p.prefix, retryResp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited)
+			// 关闭之前的响应
+			if lastRetryResp != nil {
+				_ = lastRetryResp.Body.Close()
+			}
+			return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
+		}
+
+		// 网络错误时继续重试
+		if retryErr != nil || retryResp == nil {
+			log.Printf("%s single_account_503_retry: network_error attempt=%d/%d error=%v",
+				p.prefix, attempt, antigravitySingleAccountSmartRetryMaxAttempts, retryErr)
+			continue
+		}
+
+		// 关闭之前的响应
+		if lastRetryResp != nil {
+			_ = lastRetryResp.Body.Close()
+		}
+		lastRetryResp = retryResp
+		lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
+		_ = retryResp.Body.Close()
+
+		// 解析新的重试信息，更新下次等待时间
+		if attempt < antigravitySingleAccountSmartRetryMaxAttempts && lastRetryBody != nil {
+			_, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
+			if newWaitDuration > 0 {
+				waitDuration = newWaitDuration
+				if waitDuration > antigravitySingleAccountSmartRetryMaxWait {
+					waitDuration = antigravitySingleAccountSmartRetryMaxWait
+				}
+				if waitDuration < antigravitySmartRetryMinWait {
+					waitDuration = antigravitySmartRetryMinWait
+				}
+			}
+		}
+	}
+
+	// 所有重试都失败，不设限流，直接返回 503
+	// Handler 层的单账号退避循环会做最终处理
+	retryBody := lastRetryBody
+	if retryBody == nil {
+		retryBody = respBody
+	}
+	log.Printf("%s status=%d single_account_503_retry_exhausted attempts=%d total_waited=%v model=%s account=%d body=%s (return 503 directly)",
+		p.prefix, resp.StatusCode, antigravitySingleAccountSmartRetryMaxAttempts, totalWaited, modelName, p.account.ID, truncateForLog(retryBody, 200))
+
+	return &smartRetryResult{
+		action: smartRetryActionBreakWithResp,
+		resp: &http.Response{
+			StatusCode: resp.StatusCode,
+			Header:     resp.Header.Clone(),
+			Body:       io.NopCloser(bytes.NewReader(retryBody)),
+		},
+	}
+}
+
 // antigravityRetryLoop 执行带 URL fallback 的重试循环
 func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) {
 	// 预检查：如果账号已限流，直接返回切换信号
 	if p.requestedModel != "" {
 		if remaining := p.account.GetRateLimitRemainingTimeWithContext(p.ctx, p.requestedModel); remaining > 0 {
-			log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
-				p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
-			return nil, &AntigravityAccountSwitchError{
-				OriginalAccountID: p.account.ID,
-				RateLimitedModel:  p.requestedModel,
-				IsStickySession:   p.isStickySession,
+			// 单账号 503 退避重试模式：跳过限流预检查，直接发请求。
+			// 首次请求设的限流是为了多账号调度器跳过该账号，在单账号模式下无意义。
+			// 如果上游确实还不可用，handleSmartRetry → handleSingleAccountRetryInPlace
+			// 会在 Service 层原地等待+重试，不需要在预检查这里等。
+			if isSingleAccountRetry(p.ctx) {
+				log.Printf("%s pre_check: single_account_retry skipping rate_limit remaining=%v model=%s account=%d (will retry in-place if 503)",
+					p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
+			} else {
+				log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
+					p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
+				return nil, &AntigravityAccountSwitchError{
+					OriginalAccountID: p.account.ID,
+					RateLimitedModel:  p.requestedModel,
+					IsStickySession:   p.isStickySession,
+				}
 			}
 		}
 	}
@@ -372,12 +544,12 @@ urlFallbackLoop:
 				_ = resp.Body.Close()
 
 				// ★ 统一入口：自定义错误码 + 临时不可调度
-				if handled, policyErr := s.applyErrorPolicy(p, resp.StatusCode, resp.Header, respBody); handled {
+				if handled, outStatus, policyErr := s.applyErrorPolicy(p, resp.StatusCode, resp.Header, respBody); handled {
 					if policyErr != nil {
 						return nil, policyErr
 					}
 					resp = &http.Response{
-						StatusCode: resp.StatusCode,
+						StatusCode: outStatus,
 						Header:     resp.Header.Clone(),
 						Body:       io.NopCloser(bytes.NewReader(respBody)),
 					}
@@ -611,21 +783,22 @@ func (s *AntigravityGatewayService) checkErrorPolicy(ctx context.Context, accoun
 	return s.rateLimitService.CheckErrorPolicy(ctx, account, statusCode, body)
 }
 
-// applyErrorPolicy 应用错误策略结果，返回是否应终止当前循环
-func (s *AntigravityGatewayService) applyErrorPolicy(p antigravityRetryLoopParams, statusCode int, headers http.Header, respBody []byte) (handled bool, retErr error) {
+// applyErrorPolicy 应用错误策略结果，返回是否应终止当前循环及应返回的状态码。
+// ErrorPolicySkipped 时 outStatus 为 500（前端约定：未命中的错误返回 500）。
+func (s *AntigravityGatewayService) applyErrorPolicy(p antigravityRetryLoopParams, statusCode int, headers http.Header, respBody []byte) (handled bool, outStatus int, retErr error) {
 	switch s.checkErrorPolicy(p.ctx, p.account, statusCode, respBody) {
 	case ErrorPolicySkipped:
-		return true, nil
+		return true, http.StatusInternalServerError, nil
 	case ErrorPolicyMatched:
 		_ = p.handleError(p.ctx, p.prefix, p.account, statusCode, headers, respBody,
 			p.requestedModel, p.groupID, p.sessionHash, p.isStickySession)
-		return true, nil
+		return true, statusCode, nil
 	case ErrorPolicyTempUnscheduled:
 		slog.Info("temp_unschedulable_matched",
 			"prefix", p.prefix, "status_code", statusCode, "account_id", p.account.ID)
-		return true, &AntigravityAccountSwitchError{OriginalAccountID: p.account.ID, IsStickySession: p.isStickySession}
+		return true, statusCode, &AntigravityAccountSwitchError{OriginalAccountID: p.account.ID, IsStickySession: p.isStickySession}
 	}
-	return false, nil
+	return false, statusCode, nil
 }
 
 // mapAntigravityModel 获取映射后的模型名
@@ -1940,6 +2113,12 @@ func sleepAntigravityBackoffWithContext(ctx context.Context, attempt int) bool {
 	}
 }
 
+// isSingleAccountRetry 检查 context 中是否设置了单账号退避重试标记
+func isSingleAccountRetry(ctx context.Context) bool {
+	v, _ := ctx.Value(ctxkey.SingleAccountRetry).(bool)
+	return v
+}
+
 // setModelRateLimitByModelName 使用官方模型 ID 设置模型级限流
 // 直接使用上游返回的模型 ID（如 claude-sonnet-4-5）作为限流 key
 // 返回是否已成功设置（若模型名为空或 repo 为 nil 将返回 false）
@@ -2239,6 +2418,10 @@ func (s *AntigravityGatewayService) handleUpstreamError(
 	requestedModel string,
 	groupID int64, sessionHash string, isStickySession bool,
 ) *handleModelRateLimitResult {
+	// 遵守自定义错误码策略：未命中则跳过所有限流处理
+	if !account.ShouldHandleErrorCode(statusCode) {
+		return nil
+	}
 	// 模型级限流处理（优先）
 	result := s.handleModelRateLimit(&handleModelRateLimitParams{
 		ctx:             ctx,
diff --git a/backend/internal/service/antigravity_single_account_retry_test.go b/backend/internal/service/antigravity_single_account_retry_test.go
new file mode 100644
index 00000000..d5813553
--- /dev/null
+++ b/backend/internal/service/antigravity_single_account_retry_test.go
@@ -0,0 +1,902 @@
+//go:build unit
+
+package service
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
+	"github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// 辅助函数：构造带 SingleAccountRetry 标记的 context
+// ---------------------------------------------------------------------------
+
+func ctxWithSingleAccountRetry() context.Context {
+	return context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true)
+}
+
+// ---------------------------------------------------------------------------
+// 1. isSingleAccountRetry 测试
+// ---------------------------------------------------------------------------
+
+func TestIsSingleAccountRetry_True(t *testing.T) {
+	ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, true)
+	require.True(t, isSingleAccountRetry(ctx))
+}
+
+func TestIsSingleAccountRetry_False_NoValue(t *testing.T) {
+	require.False(t, isSingleAccountRetry(context.Background()))
+}
+
+func TestIsSingleAccountRetry_False_ExplicitFalse(t *testing.T) {
+	ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, false)
+	require.False(t, isSingleAccountRetry(ctx))
+}
+
+func TestIsSingleAccountRetry_False_WrongType(t *testing.T) {
+	ctx := context.WithValue(context.Background(), ctxkey.SingleAccountRetry, "true")
+	require.False(t, isSingleAccountRetry(ctx))
+}
+
+// ---------------------------------------------------------------------------
+// 2. 常量验证
+// ---------------------------------------------------------------------------
+
+func TestSingleAccountRetryConstants(t *testing.T) {
+	require.Equal(t, 3, antigravitySingleAccountSmartRetryMaxAttempts,
+		"单账号原地重试最多 3 次")
+	require.Equal(t, 15*time.Second, antigravitySingleAccountSmartRetryMaxWait,
+		"单次最大等待 15s")
+	require.Equal(t, 30*time.Second, antigravitySingleAccountSmartRetryTotalMaxWait,
+		"总累计等待不超过 30s")
+}
+
+// ---------------------------------------------------------------------------
+// 3. handleSmartRetry + 503 + SingleAccountRetry → 走 handleSingleAccountRetryInPlace
+//    （而非设模型限流 + 切换账号）
+// ---------------------------------------------------------------------------
+
+// TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace
+// 核心场景：503 + retryDelay >= 7s + SingleAccountRetry 标记
+// → 不设模型限流、不切换账号，改为原地重试
+func TestHandleSmartRetry_503_LongDelay_SingleAccountRetry_RetryInPlace(t *testing.T) {
+	// 原地重试成功
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{successResp},
+		errors:    []error{nil},
+	}
+
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:          1,
+		Name:        "acc-single",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	// 503 + 39s >= 7s 阈值 + MODEL_CAPACITY_EXHAUSTED
+	respBody := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
+			],
+			"message": "No capacity available for model gemini-3-pro-high on the server"
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(), // 关键：设置单账号标记
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 关键断言：返回 resp（原地重试成功），而非 switchError（切换账号）
+	require.NotNil(t, result.resp, "should return successful response from in-place retry")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	require.Nil(t, result.switchError, "should NOT return switchError in single account mode")
+	require.Nil(t, result.err)
+
+	// 验证未设模型限流（单账号模式不应设限流）
+	require.Len(t, repo.modelRateLimitCalls, 0,
+		"should NOT set model rate limit in single account retry mode")
+
+	// 验证确实调用了 upstream（原地重试）
+	require.GreaterOrEqual(t, len(upstream.calls), 1, "should have made at least one retry call")
+}
+
+// TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches
+// 对照组：503 + retryDelay >= 7s + 无 SingleAccountRetry 标记
+// → 照常设模型限流 + 切换账号
+func TestHandleSmartRetry_503_LongDelay_NoSingleAccountRetry_StillSwitches(t *testing.T) {
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       2,
+		Name:     "acc-multi",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+	}
+
+	// 503 + 39s >= 7s 阈值
+	respBody := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
+			]
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:         context.Background(), // 关键：无单账号标记
+		prefix:      "[test]",
+		account:     account,
+		accessToken: "token",
+		action:      "generateContent",
+		body:        []byte(`{"input":"test"}`),
+		accountRepo: repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 对照：多账号模式返回 switchError
+	require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
+	require.Nil(t, result.resp, "should not return resp when switchError is set")
+
+	// 对照：多账号模式应设模型限流
+	require.Len(t, repo.modelRateLimitCalls, 1,
+		"multi-account mode SHOULD set model rate limit")
+}
+
+// TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches
+// 边界情况：429（非 503）+ SingleAccountRetry 标记
+// → 单账号原地重试仅针对 503，429 依然走切换账号逻辑
+func TestHandleSmartRetry_429_LongDelay_SingleAccountRetry_StillSwitches(t *testing.T) {
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       3,
+		Name:     "acc-429",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+	}
+
+	// 429 + 15s >= 7s 阈值
+	respBody := []byte(`{
+		"error": {
+			"status": "RESOURCE_EXHAUSTED",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "15s"}
+			]
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusTooManyRequests, // 429，不是 503
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:         ctxWithSingleAccountRetry(), // 有单账号标记
+		prefix:      "[test]",
+		account:     account,
+		accessToken: "token",
+		action:      "generateContent",
+		body:        []byte(`{"input":"test"}`),
+		accountRepo: repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 429 即使有单账号标记，也应走切换账号
+	require.NotNil(t, result.switchError, "429 should still return switchError even with SingleAccountRetry")
+	require.Len(t, repo.modelRateLimitCalls, 1,
+		"429 should still set model rate limit even with SingleAccountRetry")
+}
+
+// ---------------------------------------------------------------------------
+// 4. handleSmartRetry + 503 + 短延迟 + SingleAccountRetry → 智能重试耗尽后不设限流
+// ---------------------------------------------------------------------------
+
+// TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit
+// 503 + retryDelay < 7s + SingleAccountRetry → 智能重试耗尽后直接返回 503，不设限流
+func TestHandleSmartRetry_503_ShortDelay_SingleAccountRetry_NoRateLimit(t *testing.T) {
+	// 智能重试也返回 503
+	failRespBody := `{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`
+	failResp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(failRespBody)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{failResp},
+		errors:    []error{nil},
+	}
+
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       4,
+		Name:     "acc-short-503",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+	}
+
+	// 0.1s < 7s 阈值
+	respBody := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 关键断言：单账号 503 模式下，智能重试耗尽后直接返回 503 响应，不切换
+	require.NotNil(t, result.resp, "should return 503 response directly for single account mode")
+	require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode)
+	require.Nil(t, result.switchError, "should NOT switch account in single account mode")
+
+	// 关键断言：不设模型限流
+	require.Len(t, repo.modelRateLimitCalls, 0,
+		"should NOT set model rate limit for 503 in single account mode")
+}
+
+// TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit
+// 对照组：503 + retryDelay < 7s + 无 SingleAccountRetry → 智能重试耗尽后照常设限流
+func TestHandleSmartRetry_503_ShortDelay_NoSingleAccountRetry_SetsRateLimit(t *testing.T) {
+	failRespBody := `{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`
+	failResp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(failRespBody)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{failResp},
+		errors:    []error{nil},
+	}
+
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:       5,
+		Name:     "acc-multi-503",
+		Type:     AccountTypeOAuth,
+		Platform: PlatformAntigravity,
+	}
+
+	respBody := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(respBody)),
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          context.Background(), // 无单账号标记
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	}
+
+	availableURLs := []string{"https://ag-1.test"}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, availableURLs)
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 对照：多账号模式应返回 switchError
+	require.NotNil(t, result.switchError, "multi-account mode should return switchError for 503")
+	// 对照：多账号模式应设模型限流
+	require.Len(t, repo.modelRateLimitCalls, 1,
+		"multi-account mode should set model rate limit")
+}
+
+// ---------------------------------------------------------------------------
+// 5. handleSingleAccountRetryInPlace 直接测试
+// ---------------------------------------------------------------------------
+
+// TestHandleSingleAccountRetryInPlace_Success 原地重试成功
+func TestHandleSingleAccountRetryInPlace_Success(t *testing.T) {
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{successResp},
+		errors:    []error{nil},
+	}
+
+	account := &Account{
+		ID:          10,
+		Name:        "acc-inplace-ok",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp, "should return successful response")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	require.Nil(t, result.switchError, "should not switch account on success")
+	require.Nil(t, result.err)
+}
+
+// TestHandleSingleAccountRetryInPlace_AllRetriesFail 所有重试都失败，返回 503（不设限流）
+func TestHandleSingleAccountRetryInPlace_AllRetriesFail(t *testing.T) {
+	// 构造 3 个 503 响应（对应 3 次原地重试）
+	var responses []*http.Response
+	var errors []error
+	for i := 0; i < antigravitySingleAccountSmartRetryMaxAttempts; i++ {
+		responses = append(responses, &http.Response{
+			StatusCode: http.StatusServiceUnavailable,
+			Header:     http.Header{},
+			Body: io.NopCloser(strings.NewReader(`{
+				"error": {
+					"code": 503,
+					"status": "UNAVAILABLE",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+					]
+				}
+			}`)),
+		})
+		errors = append(errors, nil)
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: responses,
+		errors:    errors,
+	}
+
+	account := &Account{
+		ID:          11,
+		Name:        "acc-inplace-fail",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	origBody := []byte(`{"error":{"code":503,"status":"UNAVAILABLE"}}`)
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{"X-Test": {"original"}},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, origBody, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	// 关键：返回 503 resp，不返回 switchError
+	require.NotNil(t, result.resp, "should return 503 response directly")
+	require.Equal(t, http.StatusServiceUnavailable, result.resp.StatusCode)
+	require.Nil(t, result.switchError, "should NOT return switchError - let Handler handle it")
+	require.Nil(t, result.err)
+
+	// 验证确实重试了指定次数
+	require.Len(t, upstream.calls, antigravitySingleAccountSmartRetryMaxAttempts,
+		"should have made exactly maxAttempts retry calls")
+}
+
+// TestHandleSingleAccountRetryInPlace_WaitDurationClamped 等待时间被限制在 [min, max] 范围
+func TestHandleSingleAccountRetryInPlace_WaitDurationClamped(t *testing.T) {
+	// 用短延迟的成功响应，只验证不 panic
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{successResp},
+		errors:    []error{nil},
+	}
+
+	account := &Account{
+		ID:          12,
+		Name:        "acc-clamp",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+
+	// 等待时间过大应被 clamp 到 antigravitySingleAccountSmartRetryMaxWait
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 999*time.Second, "gemini-3-pro")
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp)
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+}
+
+// TestHandleSingleAccountRetryInPlace_ContextCanceled context 取消时立即返回
+func TestHandleSingleAccountRetryInPlace_ContextCanceled(t *testing.T) {
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{nil},
+		errors:    []error{nil},
+	}
+
+	account := &Account{
+		ID:          13,
+		Name:        "acc-cancel",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	ctx, cancel := context.WithCancel(context.Background())
+	ctx = context.WithValue(ctx, ctxkey.SingleAccountRetry, true)
+	cancel() // 立即取消
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctx,
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.Error(t, result.err, "should return context error")
+	// 不应调用 upstream（因为在等待阶段就被取消了）
+	require.Len(t, upstream.calls, 0, "should not call upstream when context is canceled")
+}
+
+// TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry 网络错误时继续重试
+func TestHandleSingleAccountRetryInPlace_NetworkError_ContinuesRetry(t *testing.T) {
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+	upstream := &mockSmartRetryUpstream{
+		// 第1次网络错误（nil resp），第2次成功
+		responses: []*http.Response{nil, successResp},
+		errors:    []error{nil, nil},
+	}
+
+	account := &Account{
+		ID:          14,
+		Name:        "acc-net-retry",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp, "should return successful response after network error recovery")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	require.Len(t, upstream.calls, 2, "first call fails (network error), second succeeds")
+}
+
+// ---------------------------------------------------------------------------
+// 6. antigravityRetryLoop 预检查：单账号模式跳过限流
+// ---------------------------------------------------------------------------
+
+// TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit
+// 预检查中，如果有 SingleAccountRetry 标记，即使账号已限流也跳过直接发请求
+func TestAntigravityRetryLoop_PreCheck_SingleAccountRetry_SkipsRateLimit(t *testing.T) {
+	// 创建一个已设模型限流的账号
+	upstream := &recordingOKUpstream{}
+	account := &Account{
+		ID:          20,
+		Name:        "acc-rate-limited",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+		Extra: map[string]any{
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					"rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339),
+				},
+			},
+		},
+	}
+
+	svc := &AntigravityGatewayService{}
+	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:            ctxWithSingleAccountRetry(),
+		prefix:         "[test]",
+		account:        account,
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"input":"test"}`),
+		httpUpstream:   upstream,
+		requestedModel: "claude-sonnet-4-5",
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.NoError(t, err, "should not return error")
+	require.NotNil(t, result, "should return result")
+	require.NotNil(t, result.resp, "should have response")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	// 关键：尽管限流了，有 SingleAccountRetry 标记时仍然到达了 upstream
+	require.Equal(t, 1, upstream.calls, "should have reached upstream despite rate limit")
+}
+
+// TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit
+// 对照组：无 SingleAccountRetry + 已限流 → 预检查返回 switchError
+func TestAntigravityRetryLoop_PreCheck_NoSingleAccountRetry_SwitchesOnRateLimit(t *testing.T) {
+	upstream := &recordingOKUpstream{}
+	account := &Account{
+		ID:          21,
+		Name:        "acc-rate-limited-multi",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+		Extra: map[string]any{
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					"rate_limit_reset_at": time.Now().Add(30 * time.Second).Format(time.RFC3339),
+				},
+			},
+		},
+	}
+
+	svc := &AntigravityGatewayService{}
+	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:            context.Background(), // 无单账号标记
+		prefix:         "[test]",
+		account:        account,
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"input":"test"}`),
+		httpUpstream:   upstream,
+		requestedModel: "claude-sonnet-4-5",
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.Nil(t, result, "should not return result on rate limit switch")
+	require.NotNil(t, err, "should return error")
+
+	var switchErr *AntigravityAccountSwitchError
+	require.ErrorAs(t, err, &switchErr, "should return AntigravityAccountSwitchError")
+	require.Equal(t, account.ID, switchErr.OriginalAccountID)
+	require.Equal(t, "claude-sonnet-4-5", switchErr.RateLimitedModel)
+
+	// upstream 不应被调用（预检查就短路了）
+	require.Equal(t, 0, upstream.calls, "upstream should NOT be called when pre-check blocks")
+}
+
+// ---------------------------------------------------------------------------
+// 7. 端到端集成场景测试
+// ---------------------------------------------------------------------------
+
+// TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E
+// 端到端场景：503 + 单账号 + 原地重试第2次成功
+func TestHandleSmartRetry_503_SingleAccount_RetryInPlace_ThenSuccess_E2E(t *testing.T) {
+	// 第1次原地重试仍返回 503，第2次成功
+	fail503Body := `{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.1s"}
+			]
+		}
+	}`
+	resp503 := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(fail503Body)),
+	}
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+
+	upstream := &mockSmartRetryUpstream{
+		responses: []*http.Response{resp503, successResp},
+		errors:    []error{nil, nil},
+	}
+
+	account := &Account{
+		ID:          30,
+		Name:        "acc-e2e",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Concurrency: 1,
+	}
+
+	resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+	}
+
+	params := antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result := svc.handleSingleAccountRetryInPlace(params, resp, nil, "https://ag-1.test", 1*time.Second, "gemini-3-pro")
+
+	require.NotNil(t, result)
+	require.Equal(t, smartRetryActionBreakWithResp, result.action)
+	require.NotNil(t, result.resp, "should return successful response after 2nd attempt")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+	require.Nil(t, result.switchError)
+	require.Len(t, upstream.calls, 2, "first 503, second OK")
+}
+
+// TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E
+// 通过 antigravityRetryLoop → handleSmartRetry → handleSingleAccountRetryInPlace 完整链路
+func TestAntigravityRetryLoop_503_SingleAccount_InPlaceRetryUsed_E2E(t *testing.T) {
+	// 初始请求返回 503 + 长延迟
+	initial503Body := []byte(`{
+		"error": {
+			"code": 503,
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "10s"}
+			],
+			"message": "No capacity available"
+		}
+	}`)
+	initial503Resp := &http.Response{
+		StatusCode: http.StatusServiceUnavailable,
+		Header:     http.Header{},
+		Body:       io.NopCloser(bytes.NewReader(initial503Body)),
+	}
+
+	// 原地重试成功
+	successResp := &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(`{"result":"ok"}`)),
+	}
+
+	upstream := &mockSmartRetryUpstream{
+		// 第1次调用（retryLoop 主循环）返回 503
+		// 第2次调用（handleSingleAccountRetryInPlace 原地重试）返回 200
+		responses: []*http.Response{initial503Resp, successResp},
+		errors:    []error{nil, nil},
+	}
+
+	repo := &stubAntigravityAccountRepo{}
+	account := &Account{
+		ID:          31,
+		Name:        "acc-e2e-loop",
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+	}
+
+	svc := &AntigravityGatewayService{}
+	result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:          ctxWithSingleAccountRetry(),
+		prefix:       "[test]",
+		account:      account,
+		accessToken:  "token",
+		action:       "generateContent",
+		body:         []byte(`{"input":"test"}`),
+		httpUpstream: upstream,
+		accountRepo:  repo,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.NoError(t, err, "should not return error on successful retry")
+	require.NotNil(t, result, "should return result")
+	require.NotNil(t, result.resp, "should return response")
+	require.Equal(t, http.StatusOK, result.resp.StatusCode)
+
+	// 验证未设模型限流
+	require.Len(t, repo.modelRateLimitCalls, 0,
+		"should NOT set model rate limit in single account retry mode")
+}
diff --git a/backend/internal/service/error_policy_integration_test.go b/backend/internal/service/error_policy_integration_test.go
index 9f8ad938..a8b42a2c 100644
--- a/backend/internal/service/error_policy_integration_test.go
+++ b/backend/internal/service/error_policy_integration_test.go
@@ -116,7 +116,7 @@ func TestRetryLoop_ErrorPolicy_CustomErrorCodes(t *testing.T) {
 			customCodes:       []any{float64(500)},
 			expectHandleError: 0,
 			expectUpstream:    1,
-			expectStatusCode:  429,
+			expectStatusCode:  500,
 		},
 		{
 			name:              "500_in_custom_codes_matched",
@@ -364,3 +364,109 @@ func TestRetryLoop_ErrorPolicy_NoPolicy_OriginalBehavior(t *testing.T) {
 	require.Equal(t, antigravityMaxRetries, upstream.calls, "should exhaust all retries")
 	require.Equal(t, 1, handleErrorCount, "handleError should be called once after retries exhausted")
 }
+
+// ---------------------------------------------------------------------------
+// epTrackingRepo — records SetRateLimited / SetError calls for verification.
+// ---------------------------------------------------------------------------
+
+type epTrackingRepo struct {
+	mockAccountRepoForGemini
+	rateLimitedCalls int
+	rateLimitedID    int64
+	setErrCalls      int
+	setErrID         int64
+	tempCalls        int
+}
+
+func (r *epTrackingRepo) SetRateLimited(_ context.Context, id int64, _ time.Time) error {
+	r.rateLimitedCalls++
+	r.rateLimitedID = id
+	return nil
+}
+
+func (r *epTrackingRepo) SetError(_ context.Context, id int64, _ string) error {
+	r.setErrCalls++
+	r.setErrID = id
+	return nil
+}
+
+func (r *epTrackingRepo) SetTempUnschedulable(_ context.Context, _ int64, _ time.Time, _ string) error {
+	r.tempCalls++
+	return nil
+}
+
+// ---------------------------------------------------------------------------
+// TestCustomErrorCode599_SkippedErrors_Return500_NoRateLimit
+//
+// 核心场景：自定义错误码设为 [599]（一个不会真正出现的错误码），
+// 当上游返回 429/500/503/401 时：
+//   - 返回给客户端的状态码必须是 500（而不是透传原始状态码）
+//   - 不调用 SetRateLimited（不进入限流状态）
+//   - 不调用 SetError（不停止调度）
+//   - 不调用 handleError
+// ---------------------------------------------------------------------------
+
+func TestCustomErrorCode599_SkippedErrors_Return500_NoRateLimit(t *testing.T) {
+	errorCodes := []int{429, 500, 503, 401, 403}
+
+	for _, upstreamStatus := range errorCodes {
+		t.Run(http.StatusText(upstreamStatus), func(t *testing.T) {
+			saveAndSetBaseURLs(t)
+
+			upstream := &epFixedUpstream{
+				statusCode: upstreamStatus,
+				body:       `{"error":"some upstream error"}`,
+			}
+			repo := &epTrackingRepo{}
+			rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+			svc := &AntigravityGatewayService{rateLimitService: rlSvc}
+
+			account := &Account{
+				ID:          500,
+				Type:        AccountTypeAPIKey,
+				Platform:    PlatformAntigravity,
+				Schedulable: true,
+				Status:      StatusActive,
+				Concurrency: 1,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(599)},
+				},
+			}
+
+			var handleErrorCount int
+			p := newRetryParams(account, upstream, func(_ context.Context, _ string, _ *Account, _ int, _ http.Header, _ []byte, _ string, _ int64, _ string, _ bool) *handleModelRateLimitResult {
+				handleErrorCount++
+				return nil
+			})
+
+			result, err := svc.antigravityRetryLoop(p)
+
+			// 不应返回 error（Skipped 不触发账号切换）
+			require.NoError(t, err, "should not return error")
+			require.NotNil(t, result, "result should not be nil")
+			require.NotNil(t, result.resp, "response should not be nil")
+			defer func() { _ = result.resp.Body.Close() }()
+
+			// 状态码必须是 500（不透传原始状态码）
+			require.Equal(t, http.StatusInternalServerError, result.resp.StatusCode,
+				"skipped error should return 500, not %d", upstreamStatus)
+
+			// 不调用 handleError
+			require.Equal(t, 0, handleErrorCount,
+				"handleError should NOT be called for skipped errors")
+
+			// 不标记限流
+			require.Equal(t, 0, repo.rateLimitedCalls,
+				"SetRateLimited should NOT be called for skipped errors")
+
+			// 不停止调度
+			require.Equal(t, 0, repo.setErrCalls,
+				"SetError should NOT be called for skipped errors")
+
+			// 只调用一次上游（不重试）
+			require.Equal(t, 1, upstream.calls,
+				"should call upstream exactly once (no retry)")
+		})
+	}
+}
diff --git a/backend/internal/service/error_policy_test.go b/backend/internal/service/error_policy_test.go
index a8b69c22..9d7d025e 100644
--- a/backend/internal/service/error_policy_test.go
+++ b/backend/internal/service/error_policy_test.go
@@ -158,6 +158,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 		statusCode        int
 		body              []byte
 		expectedHandled   bool
+		expectedStatus    int  // expected outStatus
 		expectedSwitchErr bool // expect *AntigravityAccountSwitchError
 		handleErrorCalls  int
 	}{
@@ -171,6 +172,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 			statusCode:       500,
 			body:             []byte(`"error"`),
 			expectedHandled:  false,
+			expectedStatus:   500, // passthrough
 			handleErrorCalls: 0,
 		},
 		{
@@ -187,6 +189,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 			statusCode:       500, // not in custom codes
 			body:             []byte(`"error"`),
 			expectedHandled:  true,
+			expectedStatus:   http.StatusInternalServerError, // skipped → 500
 			handleErrorCalls: 0,
 		},
 		{
@@ -203,6 +206,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 			statusCode:       500,
 			body:             []byte(`"error"`),
 			expectedHandled:  true,
+			expectedStatus:   500, // matched → original status
 			handleErrorCalls: 1,
 		},
 		{
@@ -225,6 +229,7 @@ func TestApplyErrorPolicy(t *testing.T) {
 			statusCode:        503,
 			body:              []byte(`overloaded`),
 			expectedHandled:   true,
+			expectedStatus:    503, // temp_unscheduled → original status
 			expectedSwitchErr: true,
 			handleErrorCalls:  0,
 		},
@@ -250,9 +255,10 @@ func TestApplyErrorPolicy(t *testing.T) {
 				isStickySession: true,
 			}
 
-			handled, retErr := svc.applyErrorPolicy(p, tt.statusCode, http.Header{}, tt.body)
+			handled, outStatus, retErr := svc.applyErrorPolicy(p, tt.statusCode, http.Header{}, tt.body)
 
 			require.Equal(t, tt.expectedHandled, handled, "handled mismatch")
+			require.Equal(t, tt.expectedStatus, outStatus, "outStatus mismatch")
 			require.Equal(t, tt.handleErrorCalls, handleErrorCount, "handleError call count mismatch")
 
 			if tt.expectedSwitchErr {
diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go
index 2e1b0ba4..fb2dd1a8 100644
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -243,6 +243,12 @@ var (
 	}
 )
 
+// systemBlockFilterPrefixes 需要从 system 中过滤的文本前缀列表
+// OAuth/SetupToken 账号转发时，匹配这些前缀的 system 元素会被移除
+var systemBlockFilterPrefixes = []string{
+	"x-anthropic-billing-header",
+}
+
 // ErrClaudeCodeOnly 表示分组仅允许 Claude Code 客户端访问
 var ErrClaudeCodeOnly = errors.New("this group only allows Claude Code clients")
 
@@ -1687,6 +1693,17 @@ func (s *GatewayService) listSchedulableAccounts(ctx context.Context, groupID *i
 	return accounts, useMixed, nil
 }
 
+// IsSingleAntigravityAccountGroup 检查指定分组是否只有一个 antigravity 平台的可调度账号。
+// 用于 Handler 层在首次请求时提前设置 SingleAccountRetry context，
+// 避免单账号分组收到 503 时错误地设置模型限流标记导致后续请求连续快速失败。
+func (s *GatewayService) IsSingleAntigravityAccountGroup(ctx context.Context, groupID *int64) bool {
+	accounts, _, err := s.listSchedulableAccounts(ctx, groupID, PlatformAntigravity, true)
+	if err != nil {
+		return false
+	}
+	return len(accounts) == 1
+}
+
 func (s *GatewayService) isAccountAllowedForPlatform(account *Account, platform string, useMixed bool) bool {
 	if account == nil {
 		return false
@@ -2702,6 +2719,60 @@ func hasClaudeCodePrefix(text string) bool {
 	return false
 }
 
+// matchesFilterPrefix 检查文本是否匹配任一过滤前缀
+func matchesFilterPrefix(text string) bool {
+	for _, prefix := range systemBlockFilterPrefixes {
+		if strings.HasPrefix(text, prefix) {
+			return true
+		}
+	}
+	return false
+}
+
+// filterSystemBlocksByPrefix 从 body 的 system 中移除文本匹配 systemBlockFilterPrefixes 前缀的元素
+// 直接从 body 解析 system，不依赖外部传入的 parsed.System（因为前置步骤可能已修改 body 中的 system）
+func filterSystemBlocksByPrefix(body []byte) []byte {
+	sys := gjson.GetBytes(body, "system")
+	if !sys.Exists() {
+		return body
+	}
+
+	switch {
+	case sys.Type == gjson.String:
+		if matchesFilterPrefix(sys.Str) {
+			result, err := sjson.DeleteBytes(body, "system")
+			if err != nil {
+				return body
+			}
+			return result
+		}
+	case sys.IsArray():
+		var parsed []any
+		if err := json.Unmarshal([]byte(sys.Raw), &parsed); err != nil {
+			return body
+		}
+		filtered := make([]any, 0, len(parsed))
+		changed := false
+		for _, item := range parsed {
+			if m, ok := item.(map[string]any); ok {
+				if text, ok := m["text"].(string); ok && matchesFilterPrefix(text) {
+					changed = true
+					continue
+				}
+			}
+			filtered = append(filtered, item)
+		}
+		if changed {
+			result, err := sjson.SetBytes(body, "system", filtered)
+			if err != nil {
+				return body
+			}
+			return result
+		}
+	}
+	return body
+}
+
 // injectClaudeCodePrompt 在 system 开头注入 Claude Code 提示词
 // 处理 null、字符串、数组三种格式
 func injectClaudeCodePrompt(body []byte, system any) []byte {
@@ -2981,6 +3052,12 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 		body, reqModel = normalizeClaudeOAuthRequestBody(body, reqModel, normalizeOpts)
 	}
 
+	// OAuth/SetupToken 账号：移除黑名单前缀匹配的 system 元素（如客户端注入的计费元数据）
+	// 放在 inject/normalize 之后，确保不会被覆盖
+	if account.IsOAuth() {
+		body = filterSystemBlocksByPrefix(body)
+	}
+
 	// 强制执行 cache_control 块数量限制（最多 4 个）
 	body = enforceCacheControlLimit(body)
 
diff --git a/backend/internal/service/gemini_messages_compat_service.go b/backend/internal/service/gemini_messages_compat_service.go
index d9068a23..d2b0ac0d 100644
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -771,6 +771,14 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
 			break
 		}
 
+		// 错误策略优先：匹配则跳过重试直接处理。
+		if matched, rebuilt := s.checkErrorPolicyInLoop(ctx, account, resp); matched {
+			resp = rebuilt
+			break
+		} else {
+			resp = rebuilt
+		}
+
 		if resp.StatusCode >= 400 && s.shouldRetryGeminiUpstreamError(account, resp.StatusCode) {
 			respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
 			_ = resp.Body.Close()
@@ -840,7 +848,7 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
 				if upstreamReqID == "" {
 					upstreamReqID = resp.Header.Get("x-goog-request-id")
 				}
-				return nil, s.writeGeminiMappedError(c, account, resp.StatusCode, upstreamReqID, respBody)
+				return nil, s.writeGeminiMappedError(c, account, http.StatusInternalServerError, upstreamReqID, respBody)
 			case ErrorPolicyMatched, ErrorPolicyTempUnscheduled:
 				s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
 				upstreamReqID := resp.Header.Get(requestIDHeader)
@@ -1178,6 +1186,14 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 			return nil, s.writeGoogleError(c, http.StatusBadGateway, "Upstream request failed after retries: "+safeErr)
 		}
 
+		// 错误策略优先：匹配则跳过重试直接处理。
+		if matched, rebuilt := s.checkErrorPolicyInLoop(ctx, account, resp); matched {
+			resp = rebuilt
+			break
+		} else {
+			resp = rebuilt
+		}
+
 		if resp.StatusCode >= 400 && s.shouldRetryGeminiUpstreamError(account, resp.StatusCode) {
 			respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
 			_ = resp.Body.Close()
@@ -1285,7 +1301,7 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 				if contentType == "" {
 					contentType = "application/json"
 				}
-				c.Data(resp.StatusCode, contentType, respBody)
+				c.Data(http.StatusInternalServerError, contentType, respBody)
 				return nil, fmt.Errorf("gemini upstream error: %d (skipped by error policy)", resp.StatusCode)
 			case ErrorPolicyMatched, ErrorPolicyTempUnscheduled:
 				s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
@@ -1427,6 +1443,26 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 	}, nil
 }
 
+// checkErrorPolicyInLoop 在重试循环内预检查错误策略。
+// 返回 true 表示策略已匹配（调用者应 break），resp 已重建可直接使用。
+// 返回 false 表示 ErrorPolicyNone，resp 已重建，调用者继续走重试逻辑。
+func (s *GeminiMessagesCompatService) checkErrorPolicyInLoop(
+	ctx context.Context, account *Account, resp *http.Response,
+) (matched bool, rebuilt *http.Response) {
+	if resp.StatusCode < 400 || s.rateLimitService == nil {
+		return false, resp
+	}
+	body, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
+	_ = resp.Body.Close()
+	rebuilt = &http.Response{
+		StatusCode: resp.StatusCode,
+		Header:     resp.Header.Clone(),
+		Body:       io.NopCloser(bytes.NewReader(body)),
+	}
+	policy := s.rateLimitService.CheckErrorPolicy(ctx, account, resp.StatusCode, body)
+	return policy != ErrorPolicyNone, rebuilt
+}
+
 func (s *GeminiMessagesCompatService) shouldRetryGeminiUpstreamError(account *Account, statusCode int) bool {
 	switch statusCode {
 	case 429, 500, 502, 503, 504, 529:
@@ -2576,6 +2612,10 @@ func asInt(v any) (int, bool) {
 }
 
 func (s *GeminiMessagesCompatService) handleGeminiUpstreamError(ctx context.Context, account *Account, statusCode int, headers http.Header, body []byte) {
+	// 遵守自定义错误码策略：未命中则跳过所有限流处理
+	if !account.ShouldHandleErrorCode(statusCode) {
+		return
+	}
 	if s.rateLimitService != nil && (statusCode == 401 || statusCode == 403 || statusCode == 529) {
 		s.rateLimitService.HandleUpstreamError(ctx, account, statusCode, headers, body)
 		return
diff --git a/backend/internal/service/ratelimit_service.go b/backend/internal/service/ratelimit_service.go
index 63732dee..12c48ab8 100644
--- a/backend/internal/service/ratelimit_service.go
+++ b/backend/internal/service/ratelimit_service.go
@@ -623,6 +623,10 @@ func (s *RateLimitService) ClearTempUnschedulable(ctx context.Context, accountID
 			slog.Warn("temp_unsched_cache_delete_failed", "account_id", accountID, "error", err)
 		}
 	}
+	// 同时清除模型级别限流
+	if err := s.accountRepo.ClearModelRateLimits(ctx, accountID); err != nil {
+		slog.Warn("clear_model_rate_limits_on_temp_unsched_reset_failed", "account_id", accountID, "error", err)
+	}
 	return nil
 }
 
diff --git a/frontend/package.json b/frontend/package.json
index 325eba60..1b380b17 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -17,7 +17,7 @@
   "dependencies": {
     "@lobehub/icons": "^4.0.2",
     "@vueuse/core": "^10.7.0",
-    "axios": "^1.6.2",
+    "axios": "^1.13.5",
     "chart.js": "^4.4.1",
     "dompurify": "^3.3.1",
     "driver.js": "^1.4.0",
diff --git a/frontend/pnpm-lock.yaml b/frontend/pnpm-lock.yaml
index 9af2d7af..37c384b4 100644
--- a/frontend/pnpm-lock.yaml
+++ b/frontend/pnpm-lock.yaml
@@ -15,8 +15,8 @@ importers:
         specifier: ^10.7.0
         version: 10.11.1(vue@3.5.26(typescript@5.6.3))
       axios:
-        specifier: ^1.6.2
-        version: 1.13.2
+        specifier: ^1.13.5
+        version: 1.13.5
       chart.js:
         specifier: ^4.4.1
         version: 4.5.1
@@ -1257,56 +1257,67 @@ packages:
     resolution: {integrity: sha512-EHMUcDwhtdRGlXZsGSIuXSYwD5kOT9NVnx9sqzYiwAc91wfYOE1g1djOEDseZJKKqtHAHGwnGPQu3kytmfaXLQ==}
     cpu: [arm]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-arm-musleabihf@4.54.0':
     resolution: {integrity: sha512-+pBrqEjaakN2ySv5RVrj/qLytYhPKEUwk+e3SFU5jTLHIcAtqh2rLrd/OkbNuHJpsBgxsD8ccJt5ga/SeG0JmA==}
     cpu: [arm]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-linux-arm64-gnu@4.54.0':
     resolution: {integrity: sha512-NSqc7rE9wuUaRBsBp5ckQ5CVz5aIRKCwsoa6WMF7G01sX3/qHUw/z4pv+D+ahL1EIKy6Enpcnz1RY8pf7bjwng==}
     cpu: [arm64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-arm64-musl@4.54.0':
     resolution: {integrity: sha512-gr5vDbg3Bakga5kbdpqx81m2n9IX8M6gIMlQQIXiLTNeQW6CucvuInJ91EuCJ/JYvc+rcLLsDFcfAD1K7fMofg==}
     cpu: [arm64]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-linux-loong64-gnu@4.54.0':
     resolution: {integrity: sha512-gsrtB1NA3ZYj2vq0Rzkylo9ylCtW/PhpLEivlgWe0bpgtX5+9j9EZa0wtZiCjgu6zmSeZWyI/e2YRX1URozpIw==}
     cpu: [loong64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-ppc64-gnu@4.54.0':
     resolution: {integrity: sha512-y3qNOfTBStmFNq+t4s7Tmc9hW2ENtPg8FeUD/VShI7rKxNW7O4fFeaYbMsd3tpFlIg1Q8IapFgy7Q9i2BqeBvA==}
     cpu: [ppc64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-gnu@4.54.0':
     resolution: {integrity: sha512-89sepv7h2lIVPsFma8iwmccN7Yjjtgz0Rj/Ou6fEqg3HDhpCa+Et+YSufy27i6b0Wav69Qv4WBNl3Rs6pwhebQ==}
     cpu: [riscv64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-riscv64-musl@4.54.0':
     resolution: {integrity: sha512-ZcU77ieh0M2Q8Ur7D5X7KvK+UxbXeDHwiOt/CPSBTI1fBmeDMivW0dPkdqkT4rOgDjrDDBUed9x4EgraIKoR2A==}
     cpu: [riscv64]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-linux-s390x-gnu@4.54.0':
     resolution: {integrity: sha512-2AdWy5RdDF5+4YfG/YesGDDtbyJlC9LHmL6rZw6FurBJ5n4vFGupsOBGfwMRjBYH7qRQowT8D/U4LoSvVwOhSQ==}
     cpu: [s390x]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-x64-gnu@4.54.0':
     resolution: {integrity: sha512-WGt5J8Ij/rvyqpFexxk3ffKqqbLf9AqrTBbWDk7ApGUzaIs6V+s2s84kAxklFwmMF/vBNGrVdYgbblCOFFezMQ==}
     cpu: [x64]
     os: [linux]
+    libc: [glibc]
 
   '@rollup/rollup-linux-x64-musl@4.54.0':
     resolution: {integrity: sha512-JzQmb38ATzHjxlPHuTH6tE7ojnMKM2kYNzt44LO/jJi8BpceEC8QuXYA908n8r3CNuG/B3BV8VR3Hi1rYtmPiw==}
     cpu: [x64]
     os: [linux]
+    libc: [musl]
 
   '@rollup/rollup-openharmony-arm64@4.54.0':
     resolution: {integrity: sha512-huT3fd0iC7jigGh7n3q/+lfPcXxBi+om/Rs3yiFxjvSxbSB6aohDFXbWvlspaqjeOh+hx7DDHS+5Es5qRkWkZg==}
@@ -1805,8 +1816,8 @@ packages:
     peerDependencies:
       postcss: ^8.1.0
 
-  axios@1.13.2:
-    resolution: {integrity: sha512-VPk9ebNqPcy5lRGuSlKx752IlDatOjT9paPlm8A7yOuW2Fbvp4X3JznJtT4f0GzGLLiWE9W8onz51SqLYwzGaA==}
+  axios@1.13.5:
+    resolution: {integrity: sha512-cz4ur7Vb0xS4/KUN0tPWe44eqxrIu31me+fbang3ijiNscE129POzipJJA6zniq2C/Z6sJCjMimjS8Lc/GAs8Q==}
 
   babel-plugin-macros@3.1.0:
     resolution: {integrity: sha512-Cg7TFGpIr01vOQNODXOOaGz2NpCU5gl8x1qJFbb6hbZxR7XrcE2vtbAsTAbJ7/xwJtUuJEw8K8Zr/AE0LHlesg==}
@@ -6387,7 +6398,7 @@ snapshots:
       postcss: 8.5.6
       postcss-value-parser: 4.2.0
 
-  axios@1.13.2:
+  axios@1.13.5:
     dependencies:
       follow-redirects: 1.15.11
       form-data: 4.0.5
diff --git a/frontend/src/components/account/CreateAccountModal.vue b/frontend/src/components/account/CreateAccountModal.vue
index 0e294ee6..a7290cbf 100644
--- a/frontend/src/components/account/CreateAccountModal.vue
+++ b/frontend/src/components/account/CreateAccountModal.vue
@@ -665,8 +665,8 @@
               <Icon name="cloud" size="sm" />
             </div>
             <div>
-              <span class="block text-sm font-medium text-gray-900 dark:text-white">{{ t('admin.accounts.types.upstream') }}</span>
-              <span class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.accounts.types.upstreamDesc') }}</span>
+              <span class="block text-sm font-medium text-gray-900 dark:text-white">API Key</span>
+              <span class="text-xs text-gray-500 dark:text-gray-400">{{ t('admin.accounts.types.antigravityApikey') }}</span>
             </div>
           </button>
         </div>
@@ -681,7 +681,7 @@
             type="text"
             required
             class="input"
-            placeholder="https://s.konstants.xyz"
+            placeholder="https://cloudcode-pa.googleapis.com"
           />
           <p class="input-hint">{{ t('admin.accounts.upstream.baseUrlHint') }}</p>
         </div>
@@ -816,8 +816,8 @@
         </div>
       </div>
 
-      <!-- API Key input (only for apikey type) -->
-      <div v-if="form.type === 'apikey'" class="space-y-4">
+      <!-- API Key input (only for apikey type, excluding Antigravity which has its own fields) -->
+      <div v-if="form.type === 'apikey' && form.platform !== 'antigravity'" class="space-y-4">
         <div>
           <label class="input-label">{{ t('admin.accounts.baseUrl') }}</label>
           <input
@@ -862,7 +862,7 @@
           <p class="input-hint">{{ t('admin.accounts.gemini.tier.aiStudioHint') }}</p>
         </div>
 
-        <!-- Model Restriction Section (不适用于 Gemini) -->
+        <!-- Model Restriction Section (不适用于 Gemini，Antigravity 已在上层条件排除) -->
         <div v-if="form.platform !== 'gemini'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
           <label class="input-label">{{ t('admin.accounts.modelRestriction') }}</label>
 
diff --git a/frontend/src/components/account/EditAccountModal.vue b/frontend/src/components/account/EditAccountModal.vue
index 986bd297..60575f56 100644
--- a/frontend/src/components/account/EditAccountModal.vue
+++ b/frontend/src/components/account/EditAccountModal.vue
@@ -39,7 +39,9 @@
                 ? 'https://api.openai.com'
                 : account.platform === 'gemini'
                   ? 'https://generativelanguage.googleapis.com'
-                  : 'https://api.anthropic.com'
+                  : account.platform === 'antigravity'
+                    ? 'https://cloudcode-pa.googleapis.com'
+                    : 'https://api.anthropic.com'
             "
           />
           <p class="input-hint">{{ baseUrlHint }}</p>
@@ -55,14 +57,16 @@
                 ? 'sk-proj-...'
                 : account.platform === 'gemini'
                   ? 'AIza...'
-                  : 'sk-ant-...'
+                  : account.platform === 'antigravity'
+                    ? 'sk-...'
+                    : 'sk-ant-...'
             "
           />
           <p class="input-hint">{{ t('admin.accounts.leaveEmptyToKeep') }}</p>
         </div>
 
-        <!-- Model Restriction Section (不适用于 Gemini) -->
-        <div v-if="account.platform !== 'gemini'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
+        <!-- Model Restriction Section (不适用于 Gemini 和 Antigravity) -->
+        <div v-if="account.platform !== 'gemini' && account.platform !== 'antigravity'" class="border-t border-gray-200 pt-4 dark:border-dark-600">
           <label class="input-label">{{ t('admin.accounts.modelRestriction') }}</label>
 
           <!-- Mode Toggle -->
@@ -372,7 +376,7 @@
             v-model="editBaseUrl"
             type="text"
             class="input"
-            placeholder="https://s.konstants.xyz"
+            placeholder="https://cloudcode-pa.googleapis.com"
           />
           <p class="input-hint">{{ t('admin.accounts.upstream.baseUrlHint') }}</p>
         </div>
diff --git a/frontend/src/components/admin/account/AccountActionMenu.vue b/frontend/src/components/admin/account/AccountActionMenu.vue
index bb753faa..2325f4b4 100644
--- a/frontend/src/components/admin/account/AccountActionMenu.vue
+++ b/frontend/src/components/admin/account/AccountActionMenu.vue
@@ -53,7 +53,19 @@ import type { Account } from '@/types'
 const props = defineProps<{ show: boolean; account: Account | null; position: { top: number; left: number } | null }>()
 const emit = defineEmits(['close', 'test', 'stats', 'reauth', 'refresh-token', 'reset-status', 'clear-rate-limit'])
 const { t } = useI18n()
-const isRateLimited = computed(() => props.account?.rate_limit_reset_at && new Date(props.account.rate_limit_reset_at) > new Date())
+const isRateLimited = computed(() => {
+  if (props.account?.rate_limit_reset_at && new Date(props.account.rate_limit_reset_at) > new Date()) {
+    return true
+  }
+  const modelLimits = (props.account?.extra as Record<string, unknown> | undefined)?.model_rate_limits as
+    | Record<string, { rate_limit_reset_at: string }>
+    | undefined
+  if (modelLimits) {
+    const now = new Date()
+    return Object.values(modelLimits).some(info => new Date(info.rate_limit_reset_at) > now)
+  }
+  return false
+})
 const isOverloaded = computed(() => props.account?.overload_until && new Date(props.account.overload_until) > new Date())
 
 const handleKeydown = (event: KeyboardEvent) => {
diff --git a/frontend/src/i18n/locales/en.ts b/frontend/src/i18n/locales/en.ts
index a04cbdf1..3d99df33 100644
--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -1369,6 +1369,7 @@ export default {
         googleOauth: 'Google OAuth',
         codeAssist: 'Code Assist',
         antigravityOauth: 'Antigravity OAuth',
+        antigravityApikey: 'Connect via Base URL + API Key',
         upstream: 'Upstream',
         upstreamDesc: 'Connect via Base URL + API Key'
       },
@@ -1642,7 +1643,7 @@ export default {
       // Upstream type
       upstream: {
         baseUrl: 'Upstream Base URL',
-        baseUrlHint: 'The address of the upstream Antigravity service, e.g., https://s.konstants.xyz',
+        baseUrlHint: 'The address of the upstream Antigravity service, e.g., https://cloudcode-pa.googleapis.com',
         apiKey: 'Upstream API Key',
         apiKeyHint: 'API Key for the upstream service',
         pleaseEnterBaseUrl: 'Please enter upstream Base URL',
diff --git a/frontend/src/i18n/locales/zh.ts b/frontend/src/i18n/locales/zh.ts
index 22d90ee2..bed7f9b9 100644
--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -1503,6 +1503,7 @@ export default {
         googleOauth: 'Google OAuth',
         codeAssist: 'Code Assist',
         antigravityOauth: 'Antigravity OAuth',
+        antigravityApikey: '通过 Base URL + API Key 连接',
         upstream: '对接上游',
         upstreamDesc: '通过 Base URL + API Key 连接上游',
         api_key: 'API Key',
@@ -1788,7 +1789,7 @@ export default {
       // Upstream type
       upstream: {
         baseUrl: '上游 Base URL',
-        baseUrlHint: '上游 Antigravity 服务的地址，例如：https://s.konstants.xyz',
+        baseUrlHint: '上游 Antigravity 服务的地址，例如：https://cloudcode-pa.googleapis.com',
         apiKey: '上游 API Key',
         apiKeyHint: '上游服务的 API Key',
         pleaseEnterBaseUrl: '请输入上游 Base URL',