feat(antigravity): comprehensive enhancements - model mapping, rate limiting, scheduling & ops

Key changes: - Upgrade model mapping: Opus 4.5 → Opus 4.6-thinking with precise matching - Unified rate limiting: scope-level → model-level with Redis snapshot sync - Load-balanced scheduling by call count with smart retry mechanism - Force cache billing support - Model identity injection in prompts with leak prevention - Thinking mode auto-handling (max_tokens/budget_tokens fix) - Frontend: whitelist mode toggle, model mapping validation, status indicators - Gemini session fallback with Redis Trie O(L) matching - Ops: enhanced concurrency monitoring, account availability, retry logic - Migration scripts: 049-051 for model mapping unification
2026-02-07 12:31:10 +08:00
parent e617b45ba3
commit 5e98445b22
73 changed files with 8553 additions and 1926 deletions
--- a/backend/internal/service/antigravity_rate_limit_test.go
+++ b/backend/internal/service/antigravity_rate_limit_test.go
@@ -21,6 +21,23 @@ type stubAntigravityUpstream struct {
 	calls      []string
 }

+type recordingOKUpstream struct {
+	calls int
+}
+
+func (r *recordingOKUpstream) Do(req *http.Request, proxyURL string, accountID int64, accountConcurrency int) (*http.Response, error) {
+	r.calls++
+	return &http.Response{
+		StatusCode: http.StatusOK,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader("ok")),
+	}, nil
+}
+
+func (r *recordingOKUpstream) DoWithTLS(req *http.Request, proxyURL string, accountID int64, accountConcurrency int, enableTLSFingerprint bool) (*http.Response, error) {
+	return r.Do(req, proxyURL, accountID, accountConcurrency)
+}
+
 func (s *stubAntigravityUpstream) Do(req *http.Request, proxyURL string, accountID int64, accountConcurrency int) (*http.Response, error) {
 	url := req.URL.String()
 	s.calls = append(s.calls, url)
@@ -53,10 +70,17 @@ type rateLimitCall struct {
 	resetAt   time.Time
 }

+type modelRateLimitCall struct {
+	accountID int64
+	modelKey  string // 存储的 key（应该是官方模型 ID，如 "claude-sonnet-4-5"）
+	resetAt   time.Time
+}
+
 type stubAntigravityAccountRepo struct {
 	AccountRepository
-	scopeCalls []scopeLimitCall
-	rateCalls  []rateLimitCall
+	scopeCalls          []scopeLimitCall
+	rateCalls           []rateLimitCall
+	modelRateLimitCalls []modelRateLimitCall
 }

 func (s *stubAntigravityAccountRepo) SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope AntigravityQuotaScope, resetAt time.Time) error {
@@ -69,6 +93,11 @@ func (s *stubAntigravityAccountRepo) SetRateLimited(ctx context.Context, id int6
 	return nil
 }

+func (s *stubAntigravityAccountRepo) SetModelRateLimit(ctx context.Context, id int64, modelKey string, resetAt time.Time) error {
+	s.modelRateLimitCalls = append(s.modelRateLimitCalls, modelRateLimitCall{accountID: id, modelKey: modelKey, resetAt: resetAt})
+	return nil
+}
+
 func TestAntigravityRetryLoop_URLFallback_UsesLatestSuccess(t *testing.T) {
 	oldBaseURLs := append([]string(nil), antigravity.BaseURLs...)
 	oldAvailability := antigravity.DefaultURLAvailability
@@ -94,17 +123,19 @@ func TestAntigravityRetryLoop_URLFallback_UsesLatestSuccess(t *testing.T) {

 	var handleErrorCalled bool
 	result, err := antigravityRetryLoop(antigravityRetryLoopParams{
-		prefix:       "[test]",
-		ctx:          context.Background(),
-		account:      account,
-		proxyURL:     "",
-		accessToken:  "token",
-		action:       "generateContent",
-		body:         []byte(`{"input":"test"}`),
-		quotaScope:   AntigravityQuotaScopeClaude,
-		httpUpstream: upstream,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope) {
+		prefix:         "[test]",
+		ctx:            context.Background(),
+		account:        account,
+		proxyURL:       "",
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"input":"test"}`),
+		quotaScope:     AntigravityQuotaScopeClaude,
+		httpUpstream:   upstream,
+		requestedModel: "claude-sonnet-4-5",
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			handleErrorCalled = true
+			return nil
 		},
 	})

@@ -123,14 +154,14 @@ func TestAntigravityRetryLoop_URLFallback_UsesLatestSuccess(t *testing.T) {
 	require.Equal(t, base2, available[0])
 }

-func TestAntigravityHandleUpstreamError_UsesScopeLimitWhenEnabled(t *testing.T) {
-	t.Setenv(antigravityScopeRateLimitEnv, "true")
+func TestAntigravityHandleUpstreamError_UsesScopeLimit(t *testing.T) {
+	// 分区限流始终开启，不再支持通过环境变量关闭
 	repo := &stubAntigravityAccountRepo{}
 	svc := &AntigravityGatewayService{accountRepo: repo}
 	account := &Account{ID: 9, Name: "acc-9", Platform: PlatformAntigravity}

 	body := buildGeminiRateLimitBody("3s")
-	svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, AntigravityQuotaScopeClaude)
+	svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, AntigravityQuotaScopeClaude, 0, "", false)

 	require.Len(t, repo.scopeCalls, 1)
 	require.Empty(t, repo.rateCalls)
@@ -140,20 +171,122 @@ func TestAntigravityHandleUpstreamError_UsesScopeLimitWhenEnabled(t *testing.T)
 	require.WithinDuration(t, time.Now().Add(3*time.Second), call.resetAt, 2*time.Second)
 }

-func TestAntigravityHandleUpstreamError_UsesAccountLimitWhenScopeDisabled(t *testing.T) {
-	t.Setenv(antigravityScopeRateLimitEnv, "false")
+// TestHandleUpstreamError_429_ModelRateLimit 测试 429 模型限流场景
+func TestHandleUpstreamError_429_ModelRateLimit(t *testing.T) {
 	repo := &stubAntigravityAccountRepo{}
 	svc := &AntigravityGatewayService{accountRepo: repo}
-	account := &Account{ID: 10, Name: "acc-10", Platform: PlatformAntigravity}
+	account := &Account{ID: 1, Name: "acc-1", Platform: PlatformAntigravity}

-	body := buildGeminiRateLimitBody("2s")
-	svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, AntigravityQuotaScopeClaude)
+	// 429 + RATE_LIMIT_EXCEEDED + 模型名 → 模型限流
+	body := []byte(`{
+		"error": {
+			"status": "RESOURCE_EXHAUSTED",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "15s"}
+			]
+		}
+	}`)

-	require.Len(t, repo.rateCalls, 1)
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, AntigravityQuotaScopeClaude, 0, "", false)
+
+	// 应该触发模型限流
+	require.NotNil(t, result)
+	require.True(t, result.Handled)
+	require.NotNil(t, result.SwitchError)
+	require.Equal(t, "claude-sonnet-4-5", result.SwitchError.RateLimitedModel)
+	require.Len(t, repo.modelRateLimitCalls, 1)
+	require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey)
+}
+
+// TestHandleUpstreamError_429_NonModelRateLimit 测试 429 非模型限流场景（走 scope 限流）
+func TestHandleUpstreamError_429_NonModelRateLimit(t *testing.T) {
+	repo := &stubAntigravityAccountRepo{}
+	svc := &AntigravityGatewayService{accountRepo: repo}
+	account := &Account{ID: 2, Name: "acc-2", Platform: PlatformAntigravity}
+
+	// 429 + 普通限流响应（无 RATE_LIMIT_EXCEEDED reason）→ scope 限流
+	body := buildGeminiRateLimitBody("5s")
+
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, AntigravityQuotaScopeClaude, 0, "", false)
+
+	// 不应该触发模型限流，应该走 scope 限流
+	require.Nil(t, result)
+	require.Empty(t, repo.modelRateLimitCalls)
+	require.Len(t, repo.scopeCalls, 1)
+	require.Equal(t, AntigravityQuotaScopeClaude, repo.scopeCalls[0].scope)
+}
+
+// TestHandleUpstreamError_503_ModelRateLimit 测试 503 模型限流场景
+func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) {
+	repo := &stubAntigravityAccountRepo{}
+	svc := &AntigravityGatewayService{accountRepo: repo}
+	account := &Account{ID: 3, Name: "acc-3", Platform: PlatformAntigravity}
+
+	// 503 + MODEL_CAPACITY_EXHAUSTED → 模型限流
+	body := []byte(`{
+		"error": {
+			"status": "UNAVAILABLE",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+				{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "30s"}
+			]
+		}
+	}`)
+
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, AntigravityQuotaScopeGeminiText, 0, "", false)
+
+	// 应该触发模型限流
+	require.NotNil(t, result)
+	require.True(t, result.Handled)
+	require.NotNil(t, result.SwitchError)
+	require.Equal(t, "gemini-3-pro-high", result.SwitchError.RateLimitedModel)
+	require.Len(t, repo.modelRateLimitCalls, 1)
+	require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey)
+}
+
+// TestHandleUpstreamError_503_NonModelRateLimit 测试 503 非模型限流场景（不处理）
+func TestHandleUpstreamError_503_NonModelRateLimit(t *testing.T) {
+	repo := &stubAntigravityAccountRepo{}
+	svc := &AntigravityGatewayService{accountRepo: repo}
+	account := &Account{ID: 4, Name: "acc-4", Platform: PlatformAntigravity}
+
+	// 503 + 普通错误（非 MODEL_CAPACITY_EXHAUSTED）→ 不做任何处理
+	body := []byte(`{
+		"error": {
+			"status": "UNAVAILABLE",
+			"message": "Service temporarily unavailable",
+			"details": [
+				{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "reason": "SERVICE_UNAVAILABLE"}
+			]
+		}
+	}`)
+
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, AntigravityQuotaScopeGeminiText, 0, "", false)
+
+	// 503 非模型限流不应该做任何处理
+	require.Nil(t, result)
+	require.Empty(t, repo.modelRateLimitCalls, "503 non-model rate limit should not trigger model rate limit")
+	require.Empty(t, repo.scopeCalls, "503 non-model rate limit should not trigger scope rate limit")
+	require.Empty(t, repo.rateCalls, "503 non-model rate limit should not trigger account rate limit")
+}
+
+// TestHandleUpstreamError_503_EmptyBody 测试 503 空响应体（不处理）
+func TestHandleUpstreamError_503_EmptyBody(t *testing.T) {
+	repo := &stubAntigravityAccountRepo{}
+	svc := &AntigravityGatewayService{accountRepo: repo}
+	account := &Account{ID: 5, Name: "acc-5", Platform: PlatformAntigravity}
+
+	// 503 + 空响应体 → 不做任何处理
+	body := []byte(`{}`)
+
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, AntigravityQuotaScopeGeminiText, 0, "", false)
+
+	// 503 空响应不应该做任何处理
+	require.Nil(t, result)
+	require.Empty(t, repo.modelRateLimitCalls)
 	require.Empty(t, repo.scopeCalls)
-	call := repo.rateCalls[0]
-	require.Equal(t, account.ID, call.accountID)
-	require.WithinDuration(t, time.Now().Add(2*time.Second), call.resetAt, 2*time.Second)
+	require.Empty(t, repo.rateCalls)
 }

 func TestAccountIsSchedulableForModel_AntigravityRateLimits(t *testing.T) {
@@ -188,3 +321,751 @@ func TestAccountIsSchedulableForModel_AntigravityRateLimits(t *testing.T) {
 func buildGeminiRateLimitBody(delay string) []byte {
 	return []byte(fmt.Sprintf(`{"error":{"message":"too many requests","details":[{"metadata":{"quotaResetDelay":%q}}]}}`, delay))
 }
+
+func TestParseGeminiRateLimitResetTime_QuotaResetDelay_RoundsUp(t *testing.T) {
+	// Avoid flakiness around Unix second boundaries.
+	for {
+		now := time.Now()
+		if now.Nanosecond() < 800*1e6 {
+			break
+		}
+		time.Sleep(5 * time.Millisecond)
+	}
+
+	baseUnix := time.Now().Unix()
+	ts := ParseGeminiRateLimitResetTime(buildGeminiRateLimitBody("0.1s"))
+	require.NotNil(t, ts)
+	require.Equal(t, baseUnix+1, *ts, "fractional seconds should be rounded up to the next second")
+}
+
+func TestParseAntigravitySmartRetryInfo(t *testing.T) {
+	tests := []struct {
+		name          string
+		body          string
+		expectedDelay time.Duration
+		expectedModel string
+		expectedNil   bool
+	}{
+		{
+			name: "valid complete response with RATE_LIMIT_EXCEEDED",
+			body: `{
+				"error": {
+					"code": 429,
+					"details": [
+						{
+							"@type": "type.googleapis.com/google.rpc.ErrorInfo",
+							"domain": "cloudcode-pa.googleapis.com",
+							"metadata": {
+								"model": "claude-sonnet-4-5",
+								"quotaResetDelay": "201.506475ms"
+							},
+							"reason": "RATE_LIMIT_EXCEEDED"
+						},
+						{
+							"@type": "type.googleapis.com/google.rpc.RetryInfo",
+							"retryDelay": "0.201506475s"
+						}
+					],
+					"message": "You have exhausted your capacity on this model.",
+					"status": "RESOURCE_EXHAUSTED"
+				}
+			}`,
+			expectedDelay: 201506475 * time.Nanosecond,
+			expectedModel: "claude-sonnet-4-5",
+		},
+		{
+			name: "429 RESOURCE_EXHAUSTED without RATE_LIMIT_EXCEEDED - should return nil",
+			body: `{
+				"error": {
+					"code": 429,
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{
+							"@type": "type.googleapis.com/google.rpc.ErrorInfo",
+							"metadata": {"model": "claude-sonnet-4-5"},
+							"reason": "QUOTA_EXCEEDED"
+						},
+						{
+							"@type": "type.googleapis.com/google.rpc.RetryInfo",
+							"retryDelay": "3s"
+						}
+					]
+				}
+			}`,
+			expectedNil: true,
+		},
+		{
+			name: "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - long delay",
+			body: `{
+				"error": {
+					"code": 503,
+					"status": "UNAVAILABLE",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
+					],
+					"message": "No capacity available for model gemini-3-pro-high on the server"
+				}
+			}`,
+			expectedDelay: 39 * time.Second,
+			expectedModel: "gemini-3-pro-high",
+		},
+		{
+			name: "503 UNAVAILABLE without MODEL_CAPACITY_EXHAUSTED - should return nil",
+			body: `{
+				"error": {
+					"code": 503,
+					"status": "UNAVAILABLE",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-pro"}, "reason": "SERVICE_UNAVAILABLE"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "5s"}
+					]
+				}
+			}`,
+			expectedNil: true,
+		},
+		{
+			name: "wrong status - should return nil",
+			body: `{
+				"error": {
+					"code": 429,
+					"status": "INVALID_ARGUMENT",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "3s"}
+					]
+				}
+			}`,
+			expectedNil: true,
+		},
+		{
+			name: "missing status - should return nil",
+			body: `{
+				"error": {
+					"code": 429,
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "3s"}
+					]
+				}
+			}`,
+			expectedNil: true,
+		},
+		{
+			name: "milliseconds format is now supported",
+			body: `{
+				"error": {
+					"code": 429,
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "test-model"}, "reason": "RATE_LIMIT_EXCEEDED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "500ms"}
+					]
+				}
+			}`,
+			expectedDelay: 500 * time.Millisecond,
+			expectedModel: "test-model",
+		},
+		{
+			name: "minutes format is supported",
+			body: `{
+				"error": {
+					"code": 429,
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "RATE_LIMIT_EXCEEDED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "4m50s"}
+					]
+				}
+			}`,
+			expectedDelay: 4*time.Minute + 50*time.Second,
+			expectedModel: "gemini-3-pro",
+		},
+		{
+			name: "missing model name - should return nil",
+			body: `{
+				"error": {
+					"code": 429,
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "reason": "RATE_LIMIT_EXCEEDED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "3s"}
+					]
+				}
+			}`,
+			expectedNil: true,
+		},
+		{
+			name:        "invalid JSON",
+			body:        `not json`,
+			expectedNil: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := parseAntigravitySmartRetryInfo([]byte(tt.body))
+			if tt.expectedNil {
+				if result != nil {
+					t.Errorf("expected nil, got %+v", result)
+				}
+				return
+			}
+			if result == nil {
+				t.Errorf("expected non-nil result")
+				return
+			}
+			if result.RetryDelay != tt.expectedDelay {
+				t.Errorf("RetryDelay = %v, want %v", result.RetryDelay, tt.expectedDelay)
+			}
+			if result.ModelName != tt.expectedModel {
+				t.Errorf("ModelName = %q, want %q", result.ModelName, tt.expectedModel)
+			}
+		})
+	}
+}
+
+func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
+	oauthAccount := &Account{Type: AccountTypeOAuth}
+	setupTokenAccount := &Account{Type: AccountTypeSetupToken}
+	apiKeyAccount := &Account{Type: AccountTypeAPIKey}
+
+	tests := []struct {
+		name                    string
+		account                 *Account
+		body                    string
+		expectedShouldRetry     bool
+		expectedShouldRateLimit bool
+		minWait                 time.Duration
+		modelName               string
+	}{
+		{
+			name:    "OAuth account with short delay (< 7s) - smart retry",
+			account: oauthAccount,
+			body: `{
+				"error": {
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-opus-4"}, "reason": "RATE_LIMIT_EXCEEDED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
+					]
+				}
+			}`,
+			expectedShouldRetry:     true,
+			expectedShouldRateLimit: false,
+			minWait:                 1 * time.Second, // 0.5s < 1s, 使用最小等待时间 1s
+			modelName:               "claude-opus-4",
+		},
+		{
+			name:    "SetupToken account with short delay - smart retry",
+			account: setupTokenAccount,
+			body: `{
+				"error": {
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-flash"}, "reason": "RATE_LIMIT_EXCEEDED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "3s"}
+					]
+				}
+			}`,
+			expectedShouldRetry:     true,
+			expectedShouldRateLimit: false,
+			minWait:                 3 * time.Second,
+			modelName:               "gemini-3-flash",
+		},
+		{
+			name:    "OAuth account with long delay (>= 7s) - direct rate limit",
+			account: oauthAccount,
+			body: `{
+				"error": {
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "15s"}
+					]
+				}
+			}`,
+			expectedShouldRetry:     false,
+			expectedShouldRateLimit: true,
+			modelName:               "claude-sonnet-4-5",
+		},
+		{
+			name:    "API Key account - should not trigger",
+			account: apiKeyAccount,
+			body: `{
+				"error": {
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "test"}, "reason": "RATE_LIMIT_EXCEEDED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
+					]
+				}
+			}`,
+			expectedShouldRetry:     false,
+			expectedShouldRateLimit: false,
+		},
+		{
+			name:    "OAuth account with exactly 7s delay - direct rate limit",
+			account: oauthAccount,
+			body: `{
+				"error": {
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-pro"}, "reason": "RATE_LIMIT_EXCEEDED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "7s"}
+					]
+				}
+			}`,
+			expectedShouldRetry:     false,
+			expectedShouldRateLimit: true,
+			modelName:               "gemini-pro",
+		},
+		{
+			name:    "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - long delay",
+			account: oauthAccount,
+			body: `{
+				"error": {
+					"code": 503,
+					"status": "UNAVAILABLE",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
+						{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
+					]
+				}
+			}`,
+			expectedShouldRetry:     false,
+			expectedShouldRateLimit: true,
+			modelName:               "gemini-3-pro-high",
+		},
+		{
+			name:    "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - no retryDelay - use default rate limit",
+			account: oauthAccount,
+			body: `{
+				"error": {
+					"code": 503,
+					"status": "UNAVAILABLE",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-2.5-flash"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}
+					],
+					"message": "No capacity available for model gemini-2.5-flash on the server"
+				}
+			}`,
+			expectedShouldRetry:     false,
+			expectedShouldRateLimit: true,
+			modelName:               "gemini-2.5-flash",
+		},
+		{
+			name:    "429 RESOURCE_EXHAUSTED with RATE_LIMIT_EXCEEDED - no retryDelay - use default rate limit",
+			account: oauthAccount,
+			body: `{
+				"error": {
+					"code": 429,
+					"status": "RESOURCE_EXHAUSTED",
+					"details": [
+						{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "claude-sonnet-4-5"}, "reason": "RATE_LIMIT_EXCEEDED"}
+					],
+					"message": "You have exhausted your capacity on this model."
+				}
+			}`,
+			expectedShouldRetry:     false,
+			expectedShouldRateLimit: true,
+			modelName:               "claude-sonnet-4-5",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			shouldRetry, shouldRateLimit, wait, model := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body))
+			if shouldRetry != tt.expectedShouldRetry {
+				t.Errorf("shouldRetry = %v, want %v", shouldRetry, tt.expectedShouldRetry)
+			}
+			if shouldRateLimit != tt.expectedShouldRateLimit {
+				t.Errorf("shouldRateLimit = %v, want %v", shouldRateLimit, tt.expectedShouldRateLimit)
+			}
+			if shouldRetry {
+				if wait < tt.minWait {
+					t.Errorf("wait = %v, want >= %v", wait, tt.minWait)
+				}
+			}
+			if (shouldRetry || shouldRateLimit) && model != tt.modelName {
+				t.Errorf("modelName = %q, want %q", model, tt.modelName)
+			}
+		})
+	}
+}
+
+// TestSetModelRateLimitByModelName_UsesOfficialModelID 验证写入端使用官方模型 ID
+func TestSetModelRateLimitByModelName_UsesOfficialModelID(t *testing.T) {
+	tests := []struct {
+		name             string
+		modelName        string
+		expectedModelKey string
+		expectedSuccess  bool
+	}{
+		{
+			name:             "claude-sonnet-4-5 should be stored as-is",
+			modelName:        "claude-sonnet-4-5",
+			expectedModelKey: "claude-sonnet-4-5",
+			expectedSuccess:  true,
+		},
+		{
+			name:             "gemini-3-pro-high should be stored as-is",
+			modelName:        "gemini-3-pro-high",
+			expectedModelKey: "gemini-3-pro-high",
+			expectedSuccess:  true,
+		},
+		{
+			name:             "gemini-3-flash should be stored as-is",
+			modelName:        "gemini-3-flash",
+			expectedModelKey: "gemini-3-flash",
+			expectedSuccess:  true,
+		},
+		{
+			name:             "empty model name should fail",
+			modelName:        "",
+			expectedModelKey: "",
+			expectedSuccess:  false,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			repo := &stubAntigravityAccountRepo{}
+			resetAt := time.Now().Add(30 * time.Second)
+
+			success := setModelRateLimitByModelName(
+				context.Background(),
+				repo,
+				123, // accountID
+				tt.modelName,
+				"[test]",
+				429,
+				resetAt,
+				false, // afterSmartRetry
+			)
+
+			require.Equal(t, tt.expectedSuccess, success)
+
+			if tt.expectedSuccess {
+				require.Len(t, repo.modelRateLimitCalls, 1)
+				call := repo.modelRateLimitCalls[0]
+				require.Equal(t, int64(123), call.accountID)
+				// 关键断言：存储的 key 应该是官方模型 ID，而不是 scope
+				require.Equal(t, tt.expectedModelKey, call.modelKey, "should store official model ID, not scope")
+				require.WithinDuration(t, resetAt, call.resetAt, time.Second)
+			} else {
+				require.Empty(t, repo.modelRateLimitCalls)
+			}
+		})
+	}
+}
+
+// TestSetModelRateLimitByModelName_NotConvertToScope 验证不会将模型名转换为 scope
+func TestSetModelRateLimitByModelName_NotConvertToScope(t *testing.T) {
+	repo := &stubAntigravityAccountRepo{}
+	resetAt := time.Now().Add(30 * time.Second)
+
+	// 调用 setModelRateLimitByModelName，传入官方模型 ID
+	success := setModelRateLimitByModelName(
+		context.Background(),
+		repo,
+		456,
+		"claude-sonnet-4-5", // 官方模型 ID
+		"[test]",
+		429,
+		resetAt,
+		true, // afterSmartRetry
+	)
+
+	require.True(t, success)
+	require.Len(t, repo.modelRateLimitCalls, 1)
+
+	call := repo.modelRateLimitCalls[0]
+	// 关键断言：存储的应该是 "claude-sonnet-4-5"，而不是 "claude_sonnet"
+	require.Equal(t, "claude-sonnet-4-5", call.modelKey, "should NOT convert to scope like claude_sonnet")
+	require.NotEqual(t, "claude_sonnet", call.modelKey, "should NOT be scope")
+}
+
+func TestAntigravityRetryLoop_PreCheck_WaitsWhenRemainingBelowThreshold(t *testing.T) {
+	upstream := &recordingOKUpstream{}
+	account := &Account{
+		ID:          1,
+		Name:        "acc-1",
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+		Extra: map[string]any{
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					// RFC3339 here is second-precision; keep it safely in the future.
+					"rate_limit_reset_at": time.Now().Add(2 * time.Second).Format(time.RFC3339),
+				},
+			},
+		},
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Millisecond)
+	defer cancel()
+
+	result, err := antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:             ctx,
+		prefix:          "[test]",
+		account:         account,
+		accessToken:     "token",
+		action:          "generateContent",
+		body:            []byte(`{"input":"test"}`),
+		requestedModel:  "claude-sonnet-4-5",
+		httpUpstream:    upstream,
+		isStickySession: true,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.ErrorIs(t, err, context.DeadlineExceeded)
+	require.Nil(t, result)
+	require.Equal(t, 0, upstream.calls, "should not call upstream while waiting on pre-check")
+}
+
+func TestAntigravityRetryLoop_PreCheck_SwitchesWhenRemainingAtOrAboveThreshold(t *testing.T) {
+	upstream := &recordingOKUpstream{}
+	account := &Account{
+		ID:          2,
+		Name:        "acc-2",
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+		Extra: map[string]any{
+			modelRateLimitsKey: map[string]any{
+				"claude-sonnet-4-5": map[string]any{
+					"rate_limit_reset_at": time.Now().Add(11 * time.Second).Format(time.RFC3339),
+				},
+			},
+		},
+	}
+
+	result, err := antigravityRetryLoop(antigravityRetryLoopParams{
+		ctx:             context.Background(),
+		prefix:          "[test]",
+		account:         account,
+		accessToken:     "token",
+		action:          "generateContent",
+		body:            []byte(`{"input":"test"}`),
+		requestedModel:  "claude-sonnet-4-5",
+		httpUpstream:    upstream,
+		isStickySession: true,
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+			return nil
+		},
+	})
+
+	require.Nil(t, result)
+	var switchErr *AntigravityAccountSwitchError
+	require.ErrorAs(t, err, &switchErr)
+	require.Equal(t, account.ID, switchErr.OriginalAccountID)
+	require.Equal(t, "claude-sonnet-4-5", switchErr.RateLimitedModel)
+	require.True(t, switchErr.IsStickySession)
+	require.Equal(t, 0, upstream.calls, "should not call upstream when switching on pre-check")
+}
+
+func TestIsAntigravityAccountSwitchError(t *testing.T) {
+	tests := []struct {
+		name          string
+		err           error
+		expectedOK    bool
+		expectedID    int64
+		expectedModel string
+	}{
+		{
+			name:       "nil error",
+			err:        nil,
+			expectedOK: false,
+		},
+		{
+			name:       "generic error",
+			err:        fmt.Errorf("some error"),
+			expectedOK: false,
+		},
+		{
+			name: "account switch error",
+			err: &AntigravityAccountSwitchError{
+				OriginalAccountID: 123,
+				RateLimitedModel:  "claude-sonnet-4-5",
+				IsStickySession:   true,
+			},
+			expectedOK:    true,
+			expectedID:    123,
+			expectedModel: "claude-sonnet-4-5",
+		},
+		{
+			name: "wrapped account switch error",
+			err: fmt.Errorf("wrapped: %w", &AntigravityAccountSwitchError{
+				OriginalAccountID: 456,
+				RateLimitedModel:  "gemini-3-flash",
+				IsStickySession:   false,
+			}),
+			expectedOK:    true,
+			expectedID:    456,
+			expectedModel: "gemini-3-flash",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			switchErr, ok := IsAntigravityAccountSwitchError(tt.err)
+			require.Equal(t, tt.expectedOK, ok)
+			if tt.expectedOK {
+				require.NotNil(t, switchErr)
+				require.Equal(t, tt.expectedID, switchErr.OriginalAccountID)
+				require.Equal(t, tt.expectedModel, switchErr.RateLimitedModel)
+			} else {
+				require.Nil(t, switchErr)
+			}
+		})
+	}
+}
+
+func TestAntigravityAccountSwitchError_Error(t *testing.T) {
+	err := &AntigravityAccountSwitchError{
+		OriginalAccountID: 789,
+		RateLimitedModel:  "claude-opus-4-5",
+		IsStickySession:   true,
+	}
+	msg := err.Error()
+	require.Contains(t, msg, "789")
+	require.Contains(t, msg, "claude-opus-4-5")
+}
+
+// stubSchedulerCache 用于测试的 SchedulerCache 实现
+type stubSchedulerCache struct {
+	SchedulerCache
+	setAccountCalls []*Account
+	setAccountErr   error
+}
+
+func (s *stubSchedulerCache) SetAccount(ctx context.Context, account *Account) error {
+	s.setAccountCalls = append(s.setAccountCalls, account)
+	return s.setAccountErr
+}
+
+// TestUpdateAccountModelRateLimitInCache_UpdatesExtraAndCallsCache 测试模型限流后更新缓存
+func TestUpdateAccountModelRateLimitInCache_UpdatesExtraAndCallsCache(t *testing.T) {
+	cache := &stubSchedulerCache{}
+	snapshotService := &SchedulerSnapshotService{cache: cache}
+	svc := &AntigravityGatewayService{
+		schedulerSnapshot: snapshotService,
+	}
+
+	account := &Account{
+		ID:       100,
+		Name:     "test-account",
+		Platform: PlatformAntigravity,
+	}
+	modelKey := "claude-sonnet-4-5"
+	resetAt := time.Now().Add(30 * time.Second)
+
+	svc.updateAccountModelRateLimitInCache(context.Background(), account, modelKey, resetAt)
+
+	// 验证 Extra 字段被正确更新
+	require.NotNil(t, account.Extra)
+	limits, ok := account.Extra["model_rate_limits"].(map[string]any)
+	require.True(t, ok)
+	modelLimit, ok := limits[modelKey].(map[string]any)
+	require.True(t, ok)
+	require.NotEmpty(t, modelLimit["rate_limited_at"])
+	require.NotEmpty(t, modelLimit["rate_limit_reset_at"])
+
+	// 验证 cache.SetAccount 被调用
+	require.Len(t, cache.setAccountCalls, 1)
+	require.Equal(t, account.ID, cache.setAccountCalls[0].ID)
+}
+
+// TestUpdateAccountModelRateLimitInCache_NilSchedulerSnapshot 测试 schedulerSnapshot 为 nil 时不 panic
+func TestUpdateAccountModelRateLimitInCache_NilSchedulerSnapshot(t *testing.T) {
+	svc := &AntigravityGatewayService{
+		schedulerSnapshot: nil,
+	}
+
+	account := &Account{ID: 1, Name: "test"}
+
+	// 不应 panic
+	svc.updateAccountModelRateLimitInCache(context.Background(), account, "claude-sonnet-4-5", time.Now().Add(30*time.Second))
+
+	// Extra 不应被更新（因为函数提前返回）
+	require.Nil(t, account.Extra)
+}
+
+// TestUpdateAccountModelRateLimitInCache_PreservesExistingExtra 测试保留已有的 Extra 数据
+func TestUpdateAccountModelRateLimitInCache_PreservesExistingExtra(t *testing.T) {
+	cache := &stubSchedulerCache{}
+	snapshotService := &SchedulerSnapshotService{cache: cache}
+	svc := &AntigravityGatewayService{
+		schedulerSnapshot: snapshotService,
+	}
+
+	account := &Account{
+		ID:       200,
+		Name:     "test-account",
+		Platform: PlatformAntigravity,
+		Extra: map[string]any{
+			"existing_key": "existing_value",
+			"model_rate_limits": map[string]any{
+				"gemini-3-flash": map[string]any{
+					"rate_limited_at":     "2024-01-01T00:00:00Z",
+					"rate_limit_reset_at": "2024-01-01T00:05:00Z",
+				},
+			},
+		},
+	}
+
+	svc.updateAccountModelRateLimitInCache(context.Background(), account, "claude-sonnet-4-5", time.Now().Add(30*time.Second))
+
+	// 验证已有数据被保留
+	require.Equal(t, "existing_value", account.Extra["existing_key"])
+	limits := account.Extra["model_rate_limits"].(map[string]any)
+	require.NotNil(t, limits["gemini-3-flash"])
+	require.NotNil(t, limits["claude-sonnet-4-5"])
+}
+
+// TestSchedulerSnapshotService_UpdateAccountInCache 测试 UpdateAccountInCache 方法
+func TestSchedulerSnapshotService_UpdateAccountInCache(t *testing.T) {
+	t.Run("calls cache.SetAccount", func(t *testing.T) {
+		cache := &stubSchedulerCache{}
+		svc := &SchedulerSnapshotService{cache: cache}
+
+		account := &Account{ID: 123, Name: "test"}
+		err := svc.UpdateAccountInCache(context.Background(), account)
+
+		require.NoError(t, err)
+		require.Len(t, cache.setAccountCalls, 1)
+		require.Equal(t, int64(123), cache.setAccountCalls[0].ID)
+	})
+
+	t.Run("returns nil when cache is nil", func(t *testing.T) {
+		svc := &SchedulerSnapshotService{cache: nil}
+
+		err := svc.UpdateAccountInCache(context.Background(), &Account{ID: 1})
+
+		require.NoError(t, err)
+	})
+
+	t.Run("returns nil when account is nil", func(t *testing.T) {
+		cache := &stubSchedulerCache{}
+		svc := &SchedulerSnapshotService{cache: cache}
+
+		err := svc.UpdateAccountInCache(context.Background(), nil)
+
+		require.NoError(t, err)
+		require.Empty(t, cache.setAccountCalls)
+	})
+
+	t.Run("propagates cache error", func(t *testing.T) {
+		expectedErr := fmt.Errorf("cache error")
+		cache := &stubSchedulerCache{setAccountErr: expectedErr}
+		svc := &SchedulerSnapshotService{cache: cache}
+
+		err := svc.UpdateAccountInCache(context.Background(), &Account{ID: 1})
+
+		require.ErrorIs(t, err, expectedErr)
+	})
+}