refactor: replace scope-level rate limiting with model-level rate limiting

Merge functional changes from develop branch: - Remove AntigravityQuotaScope system (claude/gemini_text/gemini_image) - Replace with per-model rate limiting using resolveAntigravityModelKey - Remove model load statistics (IncrModelCallCount/GetModelLoadBatch) - Simplify account selection to unified priority→load→LRU algorithm - Remove SetAntigravityQuotaScopeLimit from AccountRepository - Clean up scope-related UI indicators and API fields
2026-02-09 08:19:01 +08:00
parent 1af06aed96
commit fc095bf054
23 changed files with 137 additions and 1162 deletions
--- a/backend/internal/service/account_service.go
+++ b/backend/internal/service/account_service.go
@@ -50,7 +50,6 @@ type AccountRepository interface {
 	ListSchedulableByGroupIDAndPlatforms(ctx context.Context, groupID int64, platforms []string) ([]Account, error)

 	SetRateLimited(ctx context.Context, id int64, resetAt time.Time) error
-	SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope AntigravityQuotaScope, resetAt time.Time) error
 	SetModelRateLimit(ctx context.Context, id int64, scope string, resetAt time.Time) error
 	SetOverloaded(ctx context.Context, id int64, until time.Time) error
 	SetTempUnschedulable(ctx context.Context, id int64, until time.Time, reason string) error
--- a/backend/internal/service/account_service_delete_test.go
+++ b/backend/internal/service/account_service_delete_test.go
@@ -143,10 +143,6 @@ func (s *accountRepoStub) SetRateLimited(ctx context.Context, id int64, resetAt
 	panic("unexpected SetRateLimited call")
 }

-func (s *accountRepoStub) SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope AntigravityQuotaScope, resetAt time.Time) error {
-	panic("unexpected SetAntigravityQuotaScopeLimit call")
-}
-
 func (s *accountRepoStub) SetModelRateLimit(ctx context.Context, id int64, scope string, resetAt time.Time) error {
 	panic("unexpected SetModelRateLimit call")
 }
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -101,12 +101,11 @@ type antigravityRetryLoopParams struct {
 	accessToken     string
 	action          string
 	body            []byte
-	quotaScope      AntigravityQuotaScope
 	c               *gin.Context
 	httpUpstream    HTTPUpstream
 	settingService  *SettingService
 	accountRepo     AccountRepository // 用于智能重试的模型级别限流
-	handleError     func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult
+	handleError     func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult
 	requestedModel  string // 用于限流检查的原始请求模型
 	isStickySession bool   // 是否为粘性会话（用于账号切换时的缓存计费判断）
 	groupID         int64  // 用于模型级限流时清除粘性会话
@@ -158,8 +157,8 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam

 		resetAt := time.Now().Add(rateLimitDuration)
 		if !setModelRateLimitByModelName(p.ctx, p.accountRepo, p.account.ID, modelName, p.prefix, resp.StatusCode, resetAt, false) {
-			p.handleError(p.ctx, p.prefix, p.account, resp.StatusCode, resp.Header, respBody, p.quotaScope, p.groupID, p.sessionHash, p.isStickySession)
-			log.Printf("%s status=%d rate_limited account=%d (no scope mapping)", p.prefix, resp.StatusCode, p.account.ID)
+			p.handleError(p.ctx, p.prefix, p.account, resp.StatusCode, resp.Header, respBody, p.requestedModel, p.groupID, p.sessionHash, p.isStickySession)
+			log.Printf("%s status=%d rate_limited account=%d (no model mapping)", p.prefix, resp.StatusCode, p.account.ID)
 		} else {
 			s.updateAccountModelRateLimitInCache(p.ctx, p.account, modelName, resetAt)
 		}
@@ -195,7 +194,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
 			retryReq, err := antigravity.NewAPIRequestWithURL(p.ctx, baseURL, p.action, p.accessToken, p.body)
 			if err != nil {
 				log.Printf("%s status=smart_retry_request_build_failed error=%v", p.prefix, err)
-				p.handleError(p.ctx, p.prefix, p.account, resp.StatusCode, resp.Header, respBody, p.quotaScope, p.groupID, p.sessionHash, p.isStickySession)
+				p.handleError(p.ctx, p.prefix, p.account, resp.StatusCode, resp.Header, respBody, p.requestedModel, p.groupID, p.sessionHash, p.isStickySession)
 				return &smartRetryResult{
 					action: smartRetryActionBreakWithResp,
 					resp: &http.Response{
@@ -427,7 +426,7 @@ urlFallbackLoop:
 					}

 					// 重试用尽，标记账户限流
-					p.handleError(p.ctx, p.prefix, p.account, resp.StatusCode, resp.Header, respBody, p.quotaScope, p.groupID, p.sessionHash, p.isStickySession)
+					p.handleError(p.ctx, p.prefix, p.account, resp.StatusCode, resp.Header, respBody, p.requestedModel, p.groupID, p.sessionHash, p.isStickySession)
 					log.Printf("%s status=%d rate_limited base_url=%s body=%s", p.prefix, resp.StatusCode, baseURL, truncateForLog(respBody, 200))
 					resp = &http.Response{
 						StatusCode: resp.StatusCode,
@@ -618,7 +617,7 @@ func (s *AntigravityGatewayService) applyErrorPolicy(p antigravityRetryLoopParam
 		return true, nil
 	case ErrorPolicyMatched:
 		_ = p.handleError(p.ctx, p.prefix, p.account, statusCode, headers, respBody,
-			p.quotaScope, p.groupID, p.sessionHash, p.isStickySession)
+			p.requestedModel, p.groupID, p.sessionHash, p.isStickySession)
 		return true, nil
 	case ErrorPolicyTempUnscheduled:
 		slog.Info("temp_unschedulable_matched",
@@ -1023,6 +1022,7 @@ func isModelNotFoundError(statusCode int, body []byte) bool {
 //	          ├─ 成功 → 正常返回
 //	          └─ 失败 → 设置模型限流 + 清除粘性绑定 → 切换账号
 func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context, account *Account, body []byte, isStickySession bool) (*ForwardResult, error) {
+	// 上游透传账号直接转发，不走 OAuth token 刷新
 	if account.Type == AccountTypeUpstream {
 		return s.ForwardUpstream(ctx, c, account, body)
 	}
@@ -1046,11 +1046,9 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 	if mappedModel == "" {
 		return nil, s.writeClaudeError(c, http.StatusForbidden, "permission_error", fmt.Sprintf("model %s not in whitelist", claudeReq.Model))
 	}
-	loadModel := mappedModel
 	// 应用 thinking 模式自动后缀：如果 thinking 开启且目标是 claude-sonnet-4-5，自动改为 thinking 版本
 	thinkingEnabled := claudeReq.Thinking != nil && claudeReq.Thinking.Type == "enabled"
 	mappedModel = applyThinkingModelSuffix(mappedModel, thinkingEnabled)
-	quotaScope, _ := resolveAntigravityQuotaScope(originalModel)

 	// 获取 access_token
 	if s.tokenProvider == nil {
@@ -1085,11 +1083,6 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 	// 如果客户端请求非流式，在响应处理阶段会收集完整流式响应后转换返回
 	action := "streamGenerateContent"

-	// 统计模型调用次数（包括粘性会话，用于负载均衡调度）
-	if s.cache != nil {
-		_, _ = s.cache.IncrModelCallCount(ctx, account.ID, loadModel)
-	}
-
 	// 执行带重试的请求
 	result, err := s.antigravityRetryLoop(antigravityRetryLoopParams{
 		ctx:             ctx,
@@ -1099,7 +1092,6 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 		accessToken:     accessToken,
 		action:          action,
 		body:            geminiBody,
-		quotaScope:      quotaScope,
 		c:               c,
 		httpUpstream:    s.httpUpstream,
 		settingService:  s.settingService,
@@ -1180,7 +1172,6 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 					accessToken:     accessToken,
 					action:          action,
 					body:            retryGeminiBody,
-					quotaScope:      quotaScope,
 					c:               c,
 					httpUpstream:    s.httpUpstream,
 					settingService:  s.settingService,
@@ -1291,7 +1282,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
 				}
 			}

-			s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, quotaScope, 0, "", isStickySession)
+			s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, originalModel, 0, "", isStickySession)

 			if s.shouldFailoverUpstreamError(resp.StatusCode) {
 				upstreamMsg := strings.TrimSpace(extractAntigravityErrorMessage(respBody))
@@ -1674,7 +1665,6 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 	if len(body) == 0 {
 		return nil, s.writeGoogleError(c, http.StatusBadRequest, "Request body is empty")
 	}
-	quotaScope, _ := resolveAntigravityQuotaScope(originalModel)

 	// 解析请求以获取 image_size（用于图片计费）
 	imageSize := s.extractImageSize(body)
@@ -1744,11 +1734,6 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 	// 如果客户端请求非流式，在响应处理阶段会收集完整流式响应后返回
 	upstreamAction := "streamGenerateContent"

-	// 统计模型调用次数（包括粘性会话，用于负载均衡调度）
-	if s.cache != nil {
-		_, _ = s.cache.IncrModelCallCount(ctx, account.ID, mappedModel)
-	}
-
 	// 执行带重试的请求
 	result, err := s.antigravityRetryLoop(antigravityRetryLoopParams{
 		ctx:             ctx,
@@ -1758,7 +1743,6 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 		accessToken:     accessToken,
 		action:          upstreamAction,
 		body:            wrappedBody,
-		quotaScope:      quotaScope,
 		c:               c,
 		httpUpstream:    s.httpUpstream,
 		settingService:  s.settingService,
@@ -1832,7 +1816,7 @@ func (s *AntigravityGatewayService) ForwardGemini(ctx context.Context, c *gin.Co
 		if unwrapErr != nil || len(unwrappedForOps) == 0 {
 			unwrappedForOps = respBody
 		}
-		s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, quotaScope, 0, "", isStickySession)
+		s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, originalModel, 0, "", isStickySession)
 		upstreamMsg := strings.TrimSpace(extractAntigravityErrorMessage(unwrappedForOps))
 		upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
 		upstreamDetail := s.getUpstreamErrorDetail(unwrappedForOps)
@@ -2255,10 +2239,10 @@ func (s *AntigravityGatewayService) updateAccountModelRateLimitInCache(ctx conte
 func (s *AntigravityGatewayService) handleUpstreamError(
 	ctx context.Context, prefix string, account *Account,
 	statusCode int, headers http.Header, body []byte,
-	quotaScope AntigravityQuotaScope,
+	requestedModel string,
 	groupID int64, sessionHash string, isStickySession bool,
 ) *handleModelRateLimitResult {
-	// ✨ 模型级限流处理（在原有逻辑之前）
+	// 模型级限流处理（优先）
 	result := s.handleModelRateLimit(&handleModelRateLimitParams{
 		ctx:             ctx,
 		prefix:          prefix,
@@ -2280,27 +2264,35 @@ func (s *AntigravityGatewayService) handleUpstreamError(
 		return nil
 	}

-	// 429 使用 Gemini 格式解析（从 body 解析重置时间）
+	// 429：尝试解析模型级限流，解析失败时兜底为账号级限流
 	if statusCode == 429 {
 		if logBody, maxBytes := s.getLogConfig(); logBody {
 			log.Printf("[Antigravity-Debug] 429 response body: %s", truncateString(string(body), maxBytes))
 		}

-		useScopeLimit := quotaScope != ""
 		resetAt := ParseGeminiRateLimitResetTime(body)
 		defaultDur := s.getDefaultRateLimitDuration()
-		ra := s.resolveResetTime(resetAt, defaultDur)

-		if useScopeLimit {
-			log.Printf("%s status=429 rate_limited scope=%s reset_at=%v reset_in=%v", prefix, quotaScope, ra.Format("15:04:05"), time.Until(ra).Truncate(time.Second))
-			if err := s.accountRepo.SetAntigravityQuotaScopeLimit(ctx, account.ID, quotaScope, ra); err != nil {
-				log.Printf("%s status=429 rate_limit_set_failed scope=%s error=%v", prefix, quotaScope, err)
-			}
-		} else {
-			log.Printf("%s status=429 rate_limited account=%d reset_at=%v reset_in=%v", prefix, account.ID, ra.Format("15:04:05"), time.Until(ra).Truncate(time.Second))
-			if err := s.accountRepo.SetRateLimited(ctx, account.ID, ra); err != nil {
-				log.Printf("%s status=429 rate_limit_set_failed account=%d error=%v", prefix, account.ID, err)
+		// 尝试解析模型 key 并设置模型级限流
+		modelKey := resolveAntigravityModelKey(requestedModel)
+		if modelKey != "" {
+			ra := s.resolveResetTime(resetAt, defaultDur)
+			if err := s.accountRepo.SetModelRateLimit(ctx, account.ID, modelKey, ra); err != nil {
+				log.Printf("%s status=429 model_rate_limit_set_failed model=%s error=%v", prefix, modelKey, err)
+			} else {
+				log.Printf("%s status=429 model_rate_limited model=%s account=%d reset_at=%v reset_in=%v",
+					prefix, modelKey, account.ID, ra.Format("15:04:05"), time.Until(ra).Truncate(time.Second))
+				s.updateAccountModelRateLimitInCache(ctx, account, modelKey, ra)
 			}
+			return nil
+		}
+
+		// 无法解析模型 key，兜底为账号级限流
+		ra := s.resolveResetTime(resetAt, defaultDur)
+		log.Printf("%s status=429 rate_limited account=%d reset_at=%v reset_in=%v (fallback)",
+			prefix, account.ID, ra.Format("15:04:05"), time.Until(ra).Truncate(time.Second))
+		if err := s.accountRepo.SetRateLimited(ctx, account.ID, ra); err != nil {
+			log.Printf("%s status=429 rate_limit_set_failed account=%d error=%v", prefix, account.ID, err)
 		}
 		return nil
 	}
@@ -3533,8 +3525,7 @@ func (s *AntigravityGatewayService) ForwardUpstream(ctx context.Context, c *gin.

 		// 429 错误时标记账号限流
 		if resp.StatusCode == http.StatusTooManyRequests {
-			quotaScope, _ := resolveAntigravityQuotaScope(originalModel)
-			s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, quotaScope, 0, "", false)
+			s.handleUpstreamError(ctx, prefix, account, resp.StatusCode, resp.Header, respBody, originalModel, 0, "", false)
 		}

 		// 透传上游错误
--- a/backend/internal/service/antigravity_quota_scope.go
+++ b/backend/internal/service/antigravity_quota_scope.go
@@ -2,63 +2,23 @@ package service

 import (
 	"context"
-	"slices"
 	"strings"
 	"time"
 )

-const antigravityQuotaScopesKey = "antigravity_quota_scopes"
-
-// AntigravityQuotaScope 表示 Antigravity 的配额域
-type AntigravityQuotaScope string
-
-const (
-	AntigravityQuotaScopeClaude      AntigravityQuotaScope = "claude"
-	AntigravityQuotaScopeGeminiText  AntigravityQuotaScope = "gemini_text"
-	AntigravityQuotaScopeGeminiImage AntigravityQuotaScope = "gemini_image"
-)
-
-// IsScopeSupported 检查给定的 scope 是否在分组支持的 scope 列表中
-func IsScopeSupported(supportedScopes []string, scope AntigravityQuotaScope) bool {
-	if len(supportedScopes) == 0 {
-		// 未配置时默认全部支持
-		return true
-	}
-	supported := slices.Contains(supportedScopes, string(scope))
-	return supported
-}
-
-// ResolveAntigravityQuotaScope 根据模型名称解析配额域（导出版本）
-func ResolveAntigravityQuotaScope(requestedModel string) (AntigravityQuotaScope, bool) {
-	return resolveAntigravityQuotaScope(requestedModel)
-}
-
-// resolveAntigravityQuotaScope 根据模型名称解析配额域
-func resolveAntigravityQuotaScope(requestedModel string) (AntigravityQuotaScope, bool) {
-	model := normalizeAntigravityModelName(requestedModel)
-	if model == "" {
-		return "", false
-	}
-	switch {
-	case strings.HasPrefix(model, "claude-"):
-		return AntigravityQuotaScopeClaude, true
-	case strings.HasPrefix(model, "gemini-"):
-		if isImageGenerationModel(model) {
-			return AntigravityQuotaScopeGeminiImage, true
-		}
-		return AntigravityQuotaScopeGeminiText, true
-	default:
-		return "", false
-	}
-}
-
 func normalizeAntigravityModelName(model string) string {
 	normalized := strings.ToLower(strings.TrimSpace(model))
 	normalized = strings.TrimPrefix(normalized, "models/")
 	return normalized
 }

-// IsSchedulableForModel 结合 Antigravity 配额域限流判断是否可调度。
+// resolveAntigravityModelKey 根据请求的模型名解析限流 key
+// 返回空字符串表示无法解析
+func resolveAntigravityModelKey(requestedModel string) string {
+	return normalizeAntigravityModelName(requestedModel)
+}
+
+// IsSchedulableForModel 结合模型级限流判断是否可调度。
 // 保持旧签名以兼容既有调用方；默认使用 context.Background()。
 func (a *Account) IsSchedulableForModel(requestedModel string) bool {
 	return a.IsSchedulableForModelWithContext(context.Background(), requestedModel)
@@ -74,107 +34,20 @@ func (a *Account) IsSchedulableForModelWithContext(ctx context.Context, requeste
 	if a.isModelRateLimitedWithContext(ctx, requestedModel) {
 		return false
 	}
-	if a.Platform != PlatformAntigravity {
-		return true
-	}
-	scope, ok := resolveAntigravityQuotaScope(requestedModel)
-	if !ok {
-		return true
-	}
-	resetAt := a.antigravityQuotaScopeResetAt(scope)
-	if resetAt == nil {
-		return true
-	}
-	now := time.Now()
-	return !now.Before(*resetAt)
+	return true
 }

-func (a *Account) antigravityQuotaScopeResetAt(scope AntigravityQuotaScope) *time.Time {
-	if a == nil || a.Extra == nil || scope == "" {
-		return nil
-	}
-	rawScopes, ok := a.Extra[antigravityQuotaScopesKey].(map[string]any)
-	if !ok {
-		return nil
-	}
-	rawScope, ok := rawScopes[string(scope)].(map[string]any)
-	if !ok {
-		return nil
-	}
-	resetAtRaw, ok := rawScope["rate_limit_reset_at"].(string)
-	if !ok || strings.TrimSpace(resetAtRaw) == "" {
-		return nil
-	}
-	resetAt, err := time.Parse(time.RFC3339, resetAtRaw)
-	if err != nil {
-		return nil
-	}
-	return &resetAt
-}
-
-var antigravityAllScopes = []AntigravityQuotaScope{
-	AntigravityQuotaScopeClaude,
-	AntigravityQuotaScopeGeminiText,
-	AntigravityQuotaScopeGeminiImage,
-}
-
-func (a *Account) GetAntigravityScopeRateLimits() map[string]int64 {
-	if a == nil || a.Platform != PlatformAntigravity {
-		return nil
-	}
-	now := time.Now()
-	result := make(map[string]int64)
-	for _, scope := range antigravityAllScopes {
-		resetAt := a.antigravityQuotaScopeResetAt(scope)
-		if resetAt != nil && now.Before(*resetAt) {
-			remainingSec := int64(time.Until(*resetAt).Seconds())
-			if remainingSec > 0 {
-				result[string(scope)] = remainingSec
-			}
-		}
-	}
-	if len(result) == 0 {
-		return nil
-	}
-	return result
-}
-
-// GetQuotaScopeRateLimitRemainingTime 获取模型域限流剩余时间
-// 返回 0 表示未限流或已过期
-func (a *Account) GetQuotaScopeRateLimitRemainingTime(requestedModel string) time.Duration {
-	if a == nil || a.Platform != PlatformAntigravity {
-		return 0
-	}
-	scope, ok := resolveAntigravityQuotaScope(requestedModel)
-	if !ok {
-		return 0
-	}
-	resetAt := a.antigravityQuotaScopeResetAt(scope)
-	if resetAt == nil {
-		return 0
-	}
-	if remaining := time.Until(*resetAt); remaining > 0 {
-		return remaining
-	}
-	return 0
-}
-
-// GetRateLimitRemainingTime 获取限流剩余时间（模型限流和模型域限流取最大值）
+// GetRateLimitRemainingTime 获取限流剩余时间（模型级限流）
 // 返回 0 表示未限流或已过期
 func (a *Account) GetRateLimitRemainingTime(requestedModel string) time.Duration {
 	return a.GetRateLimitRemainingTimeWithContext(context.Background(), requestedModel)
 }

-// GetRateLimitRemainingTimeWithContext 获取限流剩余时间（模型限流和模型域限流取最大值）
+// GetRateLimitRemainingTimeWithContext 获取限流剩余时间（模型级限流）
 // 返回 0 表示未限流或已过期
 func (a *Account) GetRateLimitRemainingTimeWithContext(ctx context.Context, requestedModel string) time.Duration {
 	if a == nil {
 		return 0
 	}
-	modelRemaining := a.GetModelRateLimitRemainingTimeWithContext(ctx, requestedModel)
-	scopeRemaining := a.GetQuotaScopeRateLimitRemainingTime(requestedModel)
-	if modelRemaining > scopeRemaining {
-		return modelRemaining
-	}
-	return scopeRemaining
+	return a.GetModelRateLimitRemainingTimeWithContext(ctx, requestedModel)
 }
--- a/backend/internal/service/antigravity_rate_limit_test.go
+++ b/backend/internal/service/antigravity_rate_limit_test.go
@@ -59,12 +59,6 @@ func (s *stubAntigravityUpstream) DoWithTLS(req *http.Request, proxyURL string,
 	return s.Do(req, proxyURL, accountID, accountConcurrency)
 }

-type scopeLimitCall struct {
-	accountID int64
-	scope     AntigravityQuotaScope
-	resetAt   time.Time
-}
-
 type rateLimitCall struct {
 	accountID int64
 	resetAt   time.Time
@@ -78,16 +72,10 @@ type modelRateLimitCall struct {

 type stubAntigravityAccountRepo struct {
 	AccountRepository
-	scopeCalls          []scopeLimitCall
 	rateCalls           []rateLimitCall
 	modelRateLimitCalls []modelRateLimitCall
 }

-func (s *stubAntigravityAccountRepo) SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope AntigravityQuotaScope, resetAt time.Time) error {
-	s.scopeCalls = append(s.scopeCalls, scopeLimitCall{accountID: id, scope: scope, resetAt: resetAt})
-	return nil
-}
-
 func (s *stubAntigravityAccountRepo) SetRateLimited(ctx context.Context, id int64, resetAt time.Time) error {
 	s.rateCalls = append(s.rateCalls, rateLimitCall{accountID: id, resetAt: resetAt})
 	return nil
@@ -131,10 +119,9 @@ func TestAntigravityRetryLoop_URLFallback_UsesLatestSuccess(t *testing.T) {
 		accessToken:    "token",
 		action:         "generateContent",
 		body:           []byte(`{"input":"test"}`),
-		quotaScope:     AntigravityQuotaScopeClaude,
 		httpUpstream:   upstream,
 		requestedModel: "claude-sonnet-4-5",
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			handleErrorCalled = true
 			return nil
 		},
@@ -155,23 +142,6 @@ func TestAntigravityRetryLoop_URLFallback_UsesLatestSuccess(t *testing.T) {
 	require.Equal(t, base2, available[0])
 }

-func TestAntigravityHandleUpstreamError_UsesScopeLimit(t *testing.T) {
-	// 分区限流始终开启，不再支持通过环境变量关闭
-	repo := &stubAntigravityAccountRepo{}
-	svc := &AntigravityGatewayService{accountRepo: repo}
-	account := &Account{ID: 9, Name: "acc-9", Platform: PlatformAntigravity}
-
-	body := buildGeminiRateLimitBody("3s")
-	svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, AntigravityQuotaScopeClaude, 0, "", false)
-
-	require.Len(t, repo.scopeCalls, 1)
-	require.Empty(t, repo.rateCalls)
-	call := repo.scopeCalls[0]
-	require.Equal(t, account.ID, call.accountID)
-	require.Equal(t, AntigravityQuotaScopeClaude, call.scope)
-	require.WithinDuration(t, time.Now().Add(3*time.Second), call.resetAt, 2*time.Second)
-}
-
 // TestHandleUpstreamError_429_ModelRateLimit 测试 429 模型限流场景
 func TestHandleUpstreamError_429_ModelRateLimit(t *testing.T) {
 	repo := &stubAntigravityAccountRepo{}
@@ -189,7 +159,7 @@ func TestHandleUpstreamError_429_ModelRateLimit(t *testing.T) {
 		}
 	}`)

-	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, AntigravityQuotaScopeClaude, 0, "", false)
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, "claude-sonnet-4-5", 0, "", false)

 	// 应该触发模型限流
 	require.NotNil(t, result)
@@ -200,22 +170,22 @@ func TestHandleUpstreamError_429_ModelRateLimit(t *testing.T) {
 	require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey)
 }

-// TestHandleUpstreamError_429_NonModelRateLimit 测试 429 非模型限流场景（走 scope 限流）
+// TestHandleUpstreamError_429_NonModelRateLimit 测试 429 非模型限流场景（走模型级限流兜底）
 func TestHandleUpstreamError_429_NonModelRateLimit(t *testing.T) {
 	repo := &stubAntigravityAccountRepo{}
 	svc := &AntigravityGatewayService{accountRepo: repo}
 	account := &Account{ID: 2, Name: "acc-2", Platform: PlatformAntigravity}

-	// 429 + 普通限流响应（无 RATE_LIMIT_EXCEEDED reason）→ scope 限流
+	// 429 + 普通限流响应（无 RATE_LIMIT_EXCEEDED reason）→ 走模型级限流兜底
 	body := buildGeminiRateLimitBody("5s")

-	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, AntigravityQuotaScopeClaude, 0, "", false)
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusTooManyRequests, http.Header{}, body, "claude-sonnet-4-5", 0, "", false)

-	// 不应该触发模型限流，应该走 scope 限流
+	// handleModelRateLimit 不会处理（因为没有 RATE_LIMIT_EXCEEDED），
+	// 但 429 兜底逻辑会使用 requestedModel 设置模型级限流
 	require.Nil(t, result)
-	require.Empty(t, repo.modelRateLimitCalls)
-	require.Len(t, repo.scopeCalls, 1)
-	require.Equal(t, AntigravityQuotaScopeClaude, repo.scopeCalls[0].scope)
+	require.Len(t, repo.modelRateLimitCalls, 1)
+	require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey)
 }

 // TestHandleUpstreamError_503_ModelRateLimit 测试 503 模型限流场景
@@ -235,7 +205,7 @@ func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) {
 		}
 	}`)

-	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, AntigravityQuotaScopeGeminiText, 0, "", false)
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false)

 	// 应该触发模型限流
 	require.NotNil(t, result)
@@ -263,12 +233,11 @@ func TestHandleUpstreamError_503_NonModelRateLimit(t *testing.T) {
 		}
 	}`)

-	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, AntigravityQuotaScopeGeminiText, 0, "", false)
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false)

 	// 503 非模型限流不应该做任何处理
 	require.Nil(t, result)
 	require.Empty(t, repo.modelRateLimitCalls, "503 non-model rate limit should not trigger model rate limit")
-	require.Empty(t, repo.scopeCalls, "503 non-model rate limit should not trigger scope rate limit")
 	require.Empty(t, repo.rateCalls, "503 non-model rate limit should not trigger account rate limit")
 }

@@ -281,12 +250,11 @@ func TestHandleUpstreamError_503_EmptyBody(t *testing.T) {
 	// 503 + 空响应体 → 不做任何处理
 	body := []byte(`{}`)

-	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, AntigravityQuotaScopeGeminiText, 0, "", false)
+	result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false)

 	// 503 空响应不应该做任何处理
 	require.Nil(t, result)
 	require.Empty(t, repo.modelRateLimitCalls)
-	require.Empty(t, repo.scopeCalls)
 	require.Empty(t, repo.rateCalls)
 }

@@ -307,15 +275,7 @@ func TestAccountIsSchedulableForModel_AntigravityRateLimits(t *testing.T) {
 	require.False(t, account.IsSchedulableForModel("gemini-3-flash"))

 	account.RateLimitResetAt = nil
-	account.Extra = map[string]any{
-		antigravityQuotaScopesKey: map[string]any{
-			"claude": map[string]any{
-				"rate_limit_reset_at": future.Format(time.RFC3339),
-			},
-		},
-	}
-
-	require.False(t, account.IsSchedulableForModel("claude-sonnet-4-5"))
+	require.True(t, account.IsSchedulableForModel("claude-sonnet-4-5"))
 	require.True(t, account.IsSchedulableForModel("gemini-3-flash"))
 }

@@ -635,6 +595,7 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
 			}`,
 			expectedShouldRetry:     false,
 			expectedShouldRateLimit: true,
+			minWait:                 7 * time.Second,
 			modelName:               "gemini-pro",
 		},
 		{
@@ -652,6 +613,7 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
 			}`,
 			expectedShouldRetry:     false,
 			expectedShouldRateLimit: true,
+			minWait:                 39 * time.Second,
 			modelName:               "gemini-3-pro-high",
 		},
 		{
@@ -669,6 +631,7 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
 			}`,
 			expectedShouldRetry:     false,
 			expectedShouldRateLimit: true,
+			minWait:                 30 * time.Second,
 			modelName:               "gemini-2.5-flash",
 		},
 		{
@@ -686,6 +649,7 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
 			}`,
 			expectedShouldRetry:     false,
 			expectedShouldRateLimit: true,
+			minWait:                 30 * time.Second,
 			modelName:               "claude-sonnet-4-5",
 		},
 	}
@@ -704,6 +668,11 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
 					t.Errorf("wait = %v, want >= %v", wait, tt.minWait)
 				}
 			}
+			if shouldRateLimit && tt.minWait > 0 {
+				if wait < tt.minWait {
+					t.Errorf("rate limit wait = %v, want >= %v", wait, tt.minWait)
+				}
+			}
 			if (shouldRetry || shouldRateLimit) && model != tt.modelName {
 				t.Errorf("modelName = %q, want %q", model, tt.modelName)
 			}
@@ -832,7 +801,7 @@ func TestAntigravityRetryLoop_PreCheck_SwitchesWhenRateLimited(t *testing.T) {
 		requestedModel:  "claude-sonnet-4-5",
 		httpUpstream:    upstream,
 		isStickySession: true,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	})
@@ -875,7 +844,7 @@ func TestAntigravityRetryLoop_PreCheck_SwitchesWhenRemainingLong(t *testing.T) {
 		requestedModel:  "claude-sonnet-4-5",
 		httpUpstream:    upstream,
 		isStickySession: true,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	})
--- a/backend/internal/service/antigravity_smart_retry_test.go
+++ b/backend/internal/service/antigravity_smart_retry_test.go
@@ -75,7 +75,7 @@ func TestHandleSmartRetry_URLLevelRateLimit(t *testing.T) {
 		accessToken: "token",
 		action:      "generateContent",
 		body:        []byte(`{"input":"test"}`),
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -127,7 +127,7 @@ func TestHandleSmartRetry_LongDelay_ReturnsSwitchError(t *testing.T) {
 		body:            []byte(`{"input":"test"}`),
 		accountRepo:     repo,
 		isStickySession: true,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -194,7 +194,7 @@ func TestHandleSmartRetry_ShortDelay_SmartRetrySuccess(t *testing.T) {
 		action:       "generateContent",
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -269,7 +269,7 @@ func TestHandleSmartRetry_ShortDelay_SmartRetryFailed_ReturnsSwitchError(t *test
 		httpUpstream:    upstream,
 		accountRepo:     repo,
 		isStickySession: false,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -331,7 +331,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
 		body:            []byte(`{"input":"test"}`),
 		accountRepo:     repo,
 		isStickySession: true,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -387,7 +387,7 @@ func TestHandleSmartRetry_NonAntigravityAccount_ContinuesDefaultLogic(t *testing
 		accessToken: "token",
 		action:      "generateContent",
 		body:        []byte(`{"input":"test"}`),
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -436,7 +436,7 @@ func TestHandleSmartRetry_NonModelRateLimit_ContinuesDefaultLogic(t *testing.T)
 		accessToken: "token",
 		action:      "generateContent",
 		body:        []byte(`{"input":"test"}`),
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -487,7 +487,7 @@ func TestHandleSmartRetry_ExactlyAtThreshold_ReturnsSwitchError(t *testing.T) {
 		action:      "generateContent",
 		body:        []byte(`{"input":"test"}`),
 		accountRepo: repo,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -548,7 +548,7 @@ func TestAntigravityRetryLoop_HandleSmartRetry_SwitchError_Propagates(t *testing
 		httpUpstream:    upstream,
 		accountRepo:     repo,
 		isStickySession: true,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	})
@@ -604,7 +604,7 @@ func TestHandleSmartRetry_NetworkError_ExhaustsRetry(t *testing.T) {
 		body:         []byte(`{"input":"test"}`),
 		httpUpstream: upstream,
 		accountRepo:  repo,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -662,7 +662,7 @@ func TestHandleSmartRetry_NoRetryDelay_UsesDefaultRateLimit(t *testing.T) {
 		body:            []byte(`{"input":"test"}`),
 		accountRepo:     repo,
 		isStickySession: true,
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -754,7 +754,7 @@ func TestHandleSmartRetry_ShortDelay_StickySession_FailedRetry_ClearsSession(t *
 		isStickySession: true,
 		groupID:         42,
 		sessionHash:     "sticky-hash-abc",
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -842,7 +842,7 @@ func TestHandleSmartRetry_ShortDelay_NonStickySession_FailedRetry_NoDeleteSessio
 		isStickySession: false,
 		groupID:         42,
 		sessionHash:     "", // 非粘性会话，sessionHash 为空
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -918,7 +918,7 @@ func TestHandleSmartRetry_ShortDelay_StickySession_FailedRetry_NilCache_NoPanic(
 		isStickySession: true,
 		groupID:         42,
 		sessionHash:     "sticky-hash-nil-cache",
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -983,7 +983,7 @@ func TestHandleSmartRetry_ShortDelay_StickySession_SuccessRetry_NoDeleteSession(
 		isStickySession: true,
 		groupID:         42,
 		sessionHash:     "sticky-hash-success",
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -1043,7 +1043,7 @@ func TestHandleSmartRetry_LongDelay_StickySession_NoDeleteInHandleSmartRetry(t *
 		isStickySession: true,
 		groupID:         42,
 		sessionHash:     "sticky-hash-long-delay",
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -1108,7 +1108,7 @@ func TestHandleSmartRetry_ShortDelay_NetworkError_StickySession_ClearsSession(t
 		isStickySession: true,
 		groupID:         99,
 		sessionHash:     "sticky-net-error",
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -1188,7 +1188,7 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
 		isStickySession: true,
 		groupID:         77,
 		sessionHash:     "sticky-503-short",
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	}
@@ -1278,7 +1278,7 @@ func TestAntigravityRetryLoop_SmartRetryFailed_StickySession_SwitchErrorPropagat
 		isStickySession: true,
 		groupID:         55,
 		sessionHash:     "sticky-loop-test",
-		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, quotaScope AntigravityQuotaScope, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+		handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
 			return nil
 		},
 	})
@@ -1296,4 +1296,4 @@ func TestAntigravityRetryLoop_SmartRetryFailed_StickySession_SwitchErrorPropagat
 	require.Len(t, cache.deleteCalls, 1, "should clear sticky session in handleSmartRetry")
 	require.Equal(t, int64(55), cache.deleteCalls[0].groupID)
 	require.Equal(t, "sticky-loop-test", cache.deleteCalls[0].sessionHash)
-}
+}
--- a/backend/internal/service/gateway_multiplatform_test.go
+++ b/backend/internal/service/gateway_multiplatform_test.go
@@ -142,9 +142,6 @@ func (m *mockAccountRepoForPlatform) ListSchedulableByGroupIDAndPlatforms(ctx co
 func (m *mockAccountRepoForPlatform) SetRateLimited(ctx context.Context, id int64, resetAt time.Time) error {
 	return nil
 }
-func (m *mockAccountRepoForPlatform) SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope AntigravityQuotaScope, resetAt time.Time) error {
-	return nil
-}
 func (m *mockAccountRepoForPlatform) SetModelRateLimit(ctx context.Context, id int64, scope string, resetAt time.Time) error {
 	return nil
 }
@@ -216,14 +213,6 @@ func (m *mockGatewayCacheForPlatform) DeleteSessionAccountID(ctx context.Context
 	return nil
 }

-func (m *mockGatewayCacheForPlatform) IncrModelCallCount(ctx context.Context, accountID int64, model string) (int64, error) {
-	return 0, nil
-}
-
-func (m *mockGatewayCacheForPlatform) GetModelLoadBatch(ctx context.Context, accountIDs []int64, model string) (map[int64]*ModelLoadInfo, error) {
-	return nil, nil
-}
-
 type mockGroupRepoForGateway struct {
 	groups           map[int64]*Group
 	getByIDCalls     int
@@ -282,10 +271,6 @@ func (m *mockGroupRepoForGateway) GetAccountIDsByGroupIDs(ctx context.Context, g
 	return nil, nil
 }

-func (m *mockGroupRepoForGateway) UpdateSortOrders(ctx context.Context, updates []GroupSortOrderUpdate) error {
-	return nil
-}
-
 func ptr[T any](v T) *T {
 	return &v
 }
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -246,9 +246,6 @@ var (
 // ErrClaudeCodeOnly 表示分组仅允许 Claude Code 客户端访问
 var ErrClaudeCodeOnly = errors.New("this group only allows Claude Code clients")

-// ErrModelScopeNotSupported 表示请求的模型系列不在分组支持的范围内
-var ErrModelScopeNotSupported = errors.New("model scope not supported by this group")
-
 // allowedHeaders 白名单headers（参考CRS项目）
 var allowedHeaders = map[string]bool{
 	"accept":                                    true,
@@ -274,13 +271,6 @@ var allowedHeaders = map[string]bool{
 // GatewayCache 定义网关服务的缓存操作接口。
 // 提供粘性会话（Sticky Session）的存储、查询、刷新和删除功能。
 //
-// ModelLoadInfo 模型负载信息（用于 Antigravity 调度）
-// Model load info for Antigravity scheduling
-type ModelLoadInfo struct {
-	CallCount  int64     // 当前分钟调用次数 / Call count in current minute
-	LastUsedAt time.Time // 最后调度时间（零值表示未调度过）/ Last scheduling time (zero means never scheduled)
-}
-
 // GatewayCache defines cache operations for gateway service.
 // Provides sticky session storage, retrieval, refresh and deletion capabilities.
 type GatewayCache interface {
@@ -296,15 +286,6 @@ type GatewayCache interface {
 	// DeleteSessionAccountID 删除粘性会话绑定，用于账号不可用时主动清理
 	// Delete sticky session binding, used to proactively clean up when account becomes unavailable
 	DeleteSessionAccountID(ctx context.Context, groupID int64, sessionHash string) error
-
-	// IncrModelCallCount 增加模型调用次数并更新最后调度时间（Antigravity 专用）
-	// Increment model call count and update last scheduling time (Antigravity only)
-	// 返回更新后的调用次数
-	IncrModelCallCount(ctx context.Context, accountID int64, model string) (int64, error)
-
-	// GetModelLoadBatch 批量获取账号的模型负载信息（Antigravity 专用）
-	// Batch get model load info for accounts (Antigravity only)
-	GetModelLoadBatch(ctx context.Context, accountIDs []int64, model string) (map[int64]*ModelLoadInfo, error)
 }

 // derefGroupID safely dereferences *int64 to int64, returning 0 if nil
@@ -1018,13 +999,6 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 		log.Printf("[ModelRoutingDebug] load-aware enabled: group_id=%v model=%s session=%s platform=%s", derefGroupID(groupID), requestedModel, shortSessionHash(sessionHash), platform)
 	}

-	// Antigravity 模型系列检查（在账号选择前检查，确保所有代码路径都经过此检查）
-	if platform == PlatformAntigravity && groupID != nil && requestedModel != "" {
-		if err := s.checkAntigravityModelScope(ctx, *groupID, requestedModel); err != nil {
-			return nil, err
-		}
-	}
-
 	accounts, useMixed, err := s.listSchedulableAccounts(ctx, groupID, platform, hasForcePlatform)
 	if err != nil {
 		return nil, err
@@ -1372,10 +1346,6 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 			return result, nil
 		}
 	} else {
-		// Antigravity 平台：获取模型负载信息
-		var modelLoadMap map[int64]*ModelLoadInfo
-		isAntigravity := platform == PlatformAntigravity
-
 		var available []accountWithLoad
 		for _, acc := range candidates {
 			loadInfo := loadMap[acc.ID]
@@ -1390,109 +1360,44 @@ func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, gro
 			}
 		}

-		// Antigravity 平台：按账号实际映射后的模型名获取模型负载（与 Forward 的统计保持一致）
-		if isAntigravity && requestedModel != "" && s.cache != nil && len(available) > 0 {
-			modelLoadMap = make(map[int64]*ModelLoadInfo, len(available))
-			modelToAccountIDs := make(map[string][]int64)
-			for _, item := range available {
-				mappedModel := mapAntigravityModel(item.account, requestedModel)
-				if mappedModel == "" {
-					continue
-				}
-				modelToAccountIDs[mappedModel] = append(modelToAccountIDs[mappedModel], item.account.ID)
+		// 分层过滤选择：优先级 → 负载率 → LRU
+		for len(available) > 0 {
+			// 1. 取优先级最小的集合
+			candidates := filterByMinPriority(available)
+			// 2. 取负载率最低的集合
+			candidates = filterByMinLoadRate(candidates)
+			// 3. LRU 选择最久未用的账号
+			selected := selectByLRU(candidates, preferOAuth)
+			if selected == nil {
+				break
 			}
-			for model, ids := range modelToAccountIDs {
-				batch, err := s.cache.GetModelLoadBatch(ctx, ids, model)
-				if err != nil {
-					continue
-				}
-				for id, info := range batch {
-					modelLoadMap[id] = info
-				}
-			}
-			if len(modelLoadMap) == 0 {
-				modelLoadMap = nil
-			}
-		}

-		// Antigravity 平台：优先级硬过滤 →（同优先级内）按调用次数选择（最少优先，新账号用平均值）
-		// 其他平台：分层过滤选择：优先级 → 负载率 → LRU
-		if isAntigravity {
-			for len(available) > 0 {
-				// 1. 取优先级最小的集合（硬过滤）
-				candidates := filterByMinPriority(available)
-				// 2. 同优先级内按调用次数选择（调用次数最少优先，新账号使用平均值）
-				selected := selectByCallCount(candidates, modelLoadMap, preferOAuth)
-				if selected == nil {
-					break
-				}
-
-				result, err := s.tryAcquireAccountSlot(ctx, selected.account.ID, selected.account.Concurrency)
-				if err == nil && result.Acquired {
-					// 会话数量限制检查
-					if !s.checkAndRegisterSession(ctx, selected.account, sessionHash) {
-						result.ReleaseFunc() // 释放槽位，继续尝试下一个账号
-					} else {
-						if sessionHash != "" && s.cache != nil {
-							_ = s.cache.SetSessionAccountID(ctx, derefGroupID(groupID), sessionHash, selected.account.ID, stickySessionTTL)
-						}
-						return &AccountSelectionResult{
-							Account:     selected.account,
-							Acquired:    true,
-							ReleaseFunc: result.ReleaseFunc,
-						}, nil
+			result, err := s.tryAcquireAccountSlot(ctx, selected.account.ID, selected.account.Concurrency)
+			if err == nil && result.Acquired {
+				// 会话数量限制检查
+				if !s.checkAndRegisterSession(ctx, selected.account, sessionHash) {
+					result.ReleaseFunc() // 释放槽位，继续尝试下一个账号
+				} else {
+					if sessionHash != "" && s.cache != nil {
+						_ = s.cache.SetSessionAccountID(ctx, derefGroupID(groupID), sessionHash, selected.account.ID, stickySessionTTL)
 					}
+					return &AccountSelectionResult{
+						Account:     selected.account,
+						Acquired:    true,
+						ReleaseFunc: result.ReleaseFunc,
+					}, nil
 				}
-
-				// 移除已尝试的账号，重新选择
-				selectedID := selected.account.ID
-				newAvailable := make([]accountWithLoad, 0, len(available)-1)
-				for _, acc := range available {
-					if acc.account.ID != selectedID {
-						newAvailable = append(newAvailable, acc)
-					}
-				}
-				available = newAvailable
 			}
-		} else {
-			for len(available) > 0 {
-				// 1. 取优先级最小的集合
-				candidates := filterByMinPriority(available)
-				// 2. 取负载率最低的集合
-				candidates = filterByMinLoadRate(candidates)
-				// 3. LRU 选择最久未用的账号
-				selected := selectByLRU(candidates, preferOAuth)
-				if selected == nil {
-					break
-				}

-				result, err := s.tryAcquireAccountSlot(ctx, selected.account.ID, selected.account.Concurrency)
-				if err == nil && result.Acquired {
-					// 会话数量限制检查
-					if !s.checkAndRegisterSession(ctx, selected.account, sessionHash) {
-						result.ReleaseFunc() // 释放槽位，继续尝试下一个账号
-					} else {
-						if sessionHash != "" && s.cache != nil {
-							_ = s.cache.SetSessionAccountID(ctx, derefGroupID(groupID), sessionHash, selected.account.ID, stickySessionTTL)
-						}
-						return &AccountSelectionResult{
-							Account:     selected.account,
-							Acquired:    true,
-							ReleaseFunc: result.ReleaseFunc,
-						}, nil
-					}
+			// 移除已尝试的账号，重新进行分层过滤
+			selectedID := selected.account.ID
+			newAvailable := make([]accountWithLoad, 0, len(available)-1)
+			for _, acc := range available {
+				if acc.account.ID != selectedID {
+					newAvailable = append(newAvailable, acc)
 				}
-
-				// 移除已尝试的账号，重新进行分层过滤
-				selectedID := selected.account.ID
-				newAvailable := make([]accountWithLoad, 0, len(available)-1)
-				for _, acc := range available {
-					if acc.account.ID != selectedID {
-						newAvailable = append(newAvailable, acc)
-					}
-				}
-				available = newAvailable
 			}
+			available = newAvailable
 		}
 	}

@@ -2103,87 +2008,6 @@ func sameLastUsedAt(a, b *time.Time) bool {
 	}
 }

-// selectByCallCount 从候选账号中选择调用次数最少的账号（Antigravity 专用）
-// 新账号（CallCount=0）使用平均调用次数作为虚拟值，避免冷启动被猛调
-// 如果有多个账号具有相同的最小调用次数，则随机选择一个
-func selectByCallCount(accounts []accountWithLoad, modelLoadMap map[int64]*ModelLoadInfo, preferOAuth bool) *accountWithLoad {
-	if len(accounts) == 0 {
-		return nil
-	}
-	if len(accounts) == 1 {
-		return &accounts[0]
-	}
-
-	// 如果没有负载信息，回退到 LRU
-	if modelLoadMap == nil {
-		return selectByLRU(accounts, preferOAuth)
-	}
-
-	// 1. 计算平均调用次数（用于新账号冷启动）
-	var totalCallCount int64
-	var countWithCalls int
-	for _, acc := range accounts {
-		if info := modelLoadMap[acc.account.ID]; info != nil && info.CallCount > 0 {
-			totalCallCount += info.CallCount
-			countWithCalls++
-		}
-	}
-
-	var avgCallCount int64
-	if countWithCalls > 0 {
-		avgCallCount = totalCallCount / int64(countWithCalls)
-	}
-
-	// 2. 获取每个账号的有效调用次数
-	getEffectiveCallCount := func(acc accountWithLoad) int64 {
-		if acc.account == nil {
-			return 0
-		}
-		info := modelLoadMap[acc.account.ID]
-		if info == nil || info.CallCount == 0 {
-			return avgCallCount // 新账号使用平均值
-		}
-		return info.CallCount
-	}
-
-	// 3. 找到最小调用次数
-	minCount := getEffectiveCallCount(accounts[0])
-	for _, acc := range accounts[1:] {
-		if c := getEffectiveCallCount(acc); c < minCount {
-			minCount = c
-		}
-	}
-
-	// 4. 收集所有具有最小调用次数的账号
-	var candidateIdxs []int
-	for i, acc := range accounts {
-		if getEffectiveCallCount(acc) == minCount {
-			candidateIdxs = append(candidateIdxs, i)
-		}
-	}
-
-	// 5. 如果只有一个候选，直接返回
-	if len(candidateIdxs) == 1 {
-		return &accounts[candidateIdxs[0]]
-	}
-
-	// 6. preferOAuth 处理
-	if preferOAuth {
-		var oauthIdxs []int
-		for _, idx := range candidateIdxs {
-			if accounts[idx].account.Type == AccountTypeOAuth {
-				oauthIdxs = append(oauthIdxs, idx)
-			}
-		}
-		if len(oauthIdxs) > 0 {
-			candidateIdxs = oauthIdxs
-		}
-	}
-
-	// 7. 随机选择
-	return &accounts[candidateIdxs[mathrand.Intn(len(candidateIdxs))]]
-}
-
 // sortCandidatesForFallback 根据配置选择排序策略
 // mode: "last_used"(按最后使用时间) 或 "random"(随机)
 func (s *GatewayService) sortCandidatesForFallback(accounts []*Account, preferOAuth bool, mode string) {
@@ -2236,13 +2060,6 @@ func shuffleWithinPriority(accounts []*Account) {

 // selectAccountForModelWithPlatform 选择单平台账户（完全隔离）
 func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, platform string) (*Account, error) {
-	// 对 Antigravity 平台，检查请求的模型系列是否在分组支持范围内
-	if platform == PlatformAntigravity && groupID != nil && requestedModel != "" {
-		if err := s.checkAntigravityModelScope(ctx, *groupID, requestedModel); err != nil {
-			return nil, err
-		}
-	}
-
 	preferOAuth := platform == PlatformGemini
 	routingAccountIDs := s.routingAccountIDsForRequest(ctx, groupID, requestedModel, platform)

@@ -5254,27 +5071,6 @@ func (s *GatewayService) validateUpstreamBaseURL(raw string) (string, error) {
 	return normalized, nil
 }

-// checkAntigravityModelScope 检查 Antigravity 平台的模型系列是否在分组支持范围内
-func (s *GatewayService) checkAntigravityModelScope(ctx context.Context, groupID int64, requestedModel string) error {
-	scope, ok := ResolveAntigravityQuotaScope(requestedModel)
-	if !ok {
-		return nil // 无法解析 scope，跳过检查
-	}
-
-	group, err := s.resolveGroupByID(ctx, groupID)
-	if err != nil {
-		return nil // 查询失败时放行
-	}
-	if group == nil {
-		return nil // 分组不存在时放行
-	}
-
-	if !IsScopeSupported(group.SupportedModelScopes, scope) {
-		return ErrModelScopeNotSupported
-	}
-	return nil
-}
-
 // GetAvailableModels returns the list of models available for a group
 // It aggregates model_mapping keys from all schedulable accounts in the group
 func (s *GatewayService) GetAvailableModels(ctx context.Context, groupID *int64, platform string) []string {
--- a/backend/internal/service/gemini_multiplatform_test.go
+++ b/backend/internal/service/gemini_multiplatform_test.go
@@ -133,9 +133,6 @@ func (m *mockAccountRepoForGemini) ListSchedulableByGroupIDAndPlatforms(ctx cont
 func (m *mockAccountRepoForGemini) SetRateLimited(ctx context.Context, id int64, resetAt time.Time) error {
 	return nil
 }
-func (m *mockAccountRepoForGemini) SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope AntigravityQuotaScope, resetAt time.Time) error {
-	return nil
-}
 func (m *mockAccountRepoForGemini) SetModelRateLimit(ctx context.Context, id int64, scope string, resetAt time.Time) error {
 	return nil
 }
@@ -269,14 +266,6 @@ func (m *mockGatewayCacheForGemini) DeleteSessionAccountID(ctx context.Context,
 	return nil
 }

-func (m *mockGatewayCacheForGemini) IncrModelCallCount(ctx context.Context, accountID int64, model string) (int64, error) {
-	return 0, nil
-}
-
-func (m *mockGatewayCacheForGemini) GetModelLoadBatch(ctx context.Context, accountIDs []int64, model string) (map[int64]*ModelLoadInfo, error) {
-	return nil, nil
-}
-
 // TestGeminiMessagesCompatService_SelectAccountForModelWithExclusions_GeminiPlatform 测试 Gemini 单平台选择
 func TestGeminiMessagesCompatService_SelectAccountForModelWithExclusions_GeminiPlatform(t *testing.T) {
 	ctx := context.Background()
--- a/backend/internal/service/model_rate_limit_test.go
+++ b/backend/internal/service/model_rate_limit_test.go
@@ -318,110 +318,6 @@ func TestGetModelRateLimitRemainingTime(t *testing.T) {
 	}
 }

-func TestGetQuotaScopeRateLimitRemainingTime(t *testing.T) {
-	now := time.Now()
-	future10m := now.Add(10 * time.Minute).Format(time.RFC3339)
-	past := now.Add(-10 * time.Minute).Format(time.RFC3339)
-
-	tests := []struct {
-		name           string
-		account        *Account
-		requestedModel string
-		minExpected    time.Duration
-		maxExpected    time.Duration
-	}{
-		{
-			name:           "nil account",
-			account:        nil,
-			requestedModel: "claude-sonnet-4-5",
-			minExpected:    0,
-			maxExpected:    0,
-		},
-		{
-			name: "non-antigravity platform",
-			account: &Account{
-				Platform: PlatformAnthropic,
-				Extra: map[string]any{
-					antigravityQuotaScopesKey: map[string]any{
-						"claude": map[string]any{
-							"rate_limit_reset_at": future10m,
-						},
-					},
-				},
-			},
-			requestedModel: "claude-sonnet-4-5",
-			minExpected:    0,
-			maxExpected:    0,
-		},
-		{
-			name: "claude scope rate limited",
-			account: &Account{
-				Platform: PlatformAntigravity,
-				Extra: map[string]any{
-					antigravityQuotaScopesKey: map[string]any{
-						"claude": map[string]any{
-							"rate_limit_reset_at": future10m,
-						},
-					},
-				},
-			},
-			requestedModel: "claude-sonnet-4-5",
-			minExpected:    9 * time.Minute,
-			maxExpected:    11 * time.Minute,
-		},
-		{
-			name: "gemini_text scope rate limited",
-			account: &Account{
-				Platform: PlatformAntigravity,
-				Extra: map[string]any{
-					antigravityQuotaScopesKey: map[string]any{
-						"gemini_text": map[string]any{
-							"rate_limit_reset_at": future10m,
-						},
-					},
-				},
-			},
-			requestedModel: "gemini-3-flash",
-			minExpected:    9 * time.Minute,
-			maxExpected:    11 * time.Minute,
-		},
-		{
-			name: "expired scope rate limit",
-			account: &Account{
-				Platform: PlatformAntigravity,
-				Extra: map[string]any{
-					antigravityQuotaScopesKey: map[string]any{
-						"claude": map[string]any{
-							"rate_limit_reset_at": past,
-						},
-					},
-				},
-			},
-			requestedModel: "claude-sonnet-4-5",
-			minExpected:    0,
-			maxExpected:    0,
-		},
-		{
-			name: "unsupported model",
-			account: &Account{
-				Platform: PlatformAntigravity,
-			},
-			requestedModel: "gpt-4",
-			minExpected:    0,
-			maxExpected:    0,
-		},
-	}
-
-	for _, tt := range tests {
-		t.Run(tt.name, func(t *testing.T) {
-			result := tt.account.GetQuotaScopeRateLimitRemainingTime(tt.requestedModel)
-			if result < tt.minExpected || result > tt.maxExpected {
-				t.Errorf("GetQuotaScopeRateLimitRemainingTime() = %v, want between %v and %v", result, tt.minExpected, tt.maxExpected)
-			}
-		})
-	}
-}
-
 func TestGetRateLimitRemainingTime(t *testing.T) {
 	now := time.Now()
 	future15m := now.Add(15 * time.Minute).Format(time.RFC3339)
@@ -442,45 +338,19 @@ func TestGetRateLimitRemainingTime(t *testing.T) {
 			maxExpected:    0,
 		},
 		{
-			name: "model remaining > scope remaining - returns model",
+			name: "model rate limited - 15 minutes",
 			account: &Account{
 				Platform: PlatformAntigravity,
 				Extra: map[string]any{
 					modelRateLimitsKey: map[string]any{
 						"claude-sonnet-4-5": map[string]any{
-							"rate_limit_reset_at": future15m, // 15 分钟
-						},
-					},
-					antigravityQuotaScopesKey: map[string]any{
-						"claude": map[string]any{
-							"rate_limit_reset_at": future5m, // 5 分钟
+							"rate_limit_reset_at": future15m,
 						},
 					},
 				},
 			},
 			requestedModel: "claude-sonnet-4-5",
-			minExpected:    14 * time.Minute, // 应返回较大的 15 分钟
-			maxExpected:    16 * time.Minute,
-		},
-		{
-			name: "scope remaining > model remaining - returns scope",
-			account: &Account{
-				Platform: PlatformAntigravity,
-				Extra: map[string]any{
-					modelRateLimitsKey: map[string]any{
-						"claude-sonnet-4-5": map[string]any{
-							"rate_limit_reset_at": future5m, // 5 分钟
-						},
-					},
-					antigravityQuotaScopesKey: map[string]any{
-						"claude": map[string]any{
-							"rate_limit_reset_at": future15m, // 15 分钟
-						},
-					},
-				},
-			},
-			requestedModel: "claude-sonnet-4-5",
-			minExpected:    14 * time.Minute, // 应返回较大的 15 分钟
+			minExpected:    14 * time.Minute,
 			maxExpected:    16 * time.Minute,
 		},
 		{
@@ -499,22 +369,6 @@ func TestGetRateLimitRemainingTime(t *testing.T) {
 			minExpected:    4 * time.Minute,
 			maxExpected:    6 * time.Minute,
 		},
-		{
-			name: "only scope rate limited",
-			account: &Account{
-				Platform: PlatformAntigravity,
-				Extra: map[string]any{
-					antigravityQuotaScopesKey: map[string]any{
-						"claude": map[string]any{
-							"rate_limit_reset_at": future5m,
-						},
-					},
-				},
-			},
-			requestedModel: "claude-sonnet-4-5",
-			minExpected:    4 * time.Minute,
-			maxExpected:    6 * time.Minute,
-		},
 		{
 			name: "neither rate limited",
 			account: &Account{
--- a/backend/internal/service/openai_gateway_service_test.go
+++ b/backend/internal/service/openai_gateway_service_test.go
@@ -204,14 +204,6 @@ func (c *stubGatewayCache) DeleteSessionAccountID(ctx context.Context, groupID i
 	return nil
 }

-func (c *stubGatewayCache) IncrModelCallCount(ctx context.Context, accountID int64, model string) (int64, error) {
-	return 0, nil
-}
-
-func (c *stubGatewayCache) GetModelLoadBatch(ctx context.Context, accountIDs []int64, model string) (map[int64]*ModelLoadInfo, error) {
-	return nil, nil
-}
-
 func TestOpenAISelectAccountWithLoadAwareness_FiltersUnschedulable(t *testing.T) {
 	now := time.Now()
 	resetAt := now.Add(10 * time.Minute)
--- a/backend/internal/service/ops_account_availability.go
+++ b/backend/internal/service/ops_account_availability.go
@@ -66,7 +66,6 @@ func (s *OpsService) GetAccountAvailabilityStats(ctx context.Context, platformFi
 		}

 		isAvailable := acc.Status == StatusActive && acc.Schedulable && !isRateLimited && !isOverloaded && !isTempUnsched
-		scopeRateLimits := acc.GetAntigravityScopeRateLimits()

 		if acc.Platform != "" {
 			if _, ok := platform[acc.Platform]; !ok {
@@ -85,14 +84,6 @@ func (s *OpsService) GetAccountAvailabilityStats(ctx context.Context, platformFi
 			if hasError {
 				p.ErrorCount++
 			}
-			if len(scopeRateLimits) > 0 {
-				if p.ScopeRateLimitCount == nil {
-					p.ScopeRateLimitCount = make(map[string]int64)
-				}
-				for scope := range scopeRateLimits {
-					p.ScopeRateLimitCount[scope]++
-				}
-			}
 		}

 		for _, grp := range acc.Groups {
@@ -117,14 +108,6 @@ func (s *OpsService) GetAccountAvailabilityStats(ctx context.Context, platformFi
 			if hasError {
 				g.ErrorCount++
 			}
-			if len(scopeRateLimits) > 0 {
-				if g.ScopeRateLimitCount == nil {
-					g.ScopeRateLimitCount = make(map[string]int64)
-				}
-				for scope := range scopeRateLimits {
-					g.ScopeRateLimitCount[scope]++
-				}
-			}
 		}

 		displayGroupID := int64(0)
@@ -157,9 +140,6 @@ func (s *OpsService) GetAccountAvailabilityStats(ctx context.Context, platformFi
 				item.RateLimitRemainingSec = &remainingSec
 			}
 		}
-		if len(scopeRateLimits) > 0 {
-			item.ScopeRateLimits = scopeRateLimits
-		}
 		if isOverloaded && acc.OverloadUntil != nil {
 			item.OverloadUntil = acc.OverloadUntil
 			remainingSec := int64(time.Until(*acc.OverloadUntil).Seconds())
--- a/backend/internal/service/ops_realtime_models.go
+++ b/backend/internal/service/ops_realtime_models.go
@@ -50,24 +50,22 @@ type UserConcurrencyInfo struct {

 // PlatformAvailability aggregates account availability by platform.
 type PlatformAvailability struct {
-	Platform            string           `json:"platform"`
-	TotalAccounts       int64            `json:"total_accounts"`
-	AvailableCount      int64            `json:"available_count"`
-	RateLimitCount      int64            `json:"rate_limit_count"`
-	ScopeRateLimitCount map[string]int64 `json:"scope_rate_limit_count,omitempty"`
-	ErrorCount          int64            `json:"error_count"`
+	Platform       string `json:"platform"`
+	TotalAccounts  int64  `json:"total_accounts"`
+	AvailableCount int64  `json:"available_count"`
+	RateLimitCount int64  `json:"rate_limit_count"`
+	ErrorCount     int64  `json:"error_count"`
 }

 // GroupAvailability aggregates account availability by group.
 type GroupAvailability struct {
-	GroupID             int64            `json:"group_id"`
-	GroupName           string           `json:"group_name"`
-	Platform            string           `json:"platform"`
-	TotalAccounts       int64            `json:"total_accounts"`
-	AvailableCount      int64            `json:"available_count"`
-	RateLimitCount      int64            `json:"rate_limit_count"`
-	ScopeRateLimitCount map[string]int64 `json:"scope_rate_limit_count,omitempty"`
-	ErrorCount          int64            `json:"error_count"`
+	GroupID        int64  `json:"group_id"`
+	GroupName      string `json:"group_name"`
+	Platform       string `json:"platform"`
+	TotalAccounts  int64  `json:"total_accounts"`
+	AvailableCount int64  `json:"available_count"`
+	RateLimitCount int64  `json:"rate_limit_count"`
+	ErrorCount     int64  `json:"error_count"`
 }

 // AccountAvailability represents current availability for a single account.
@@ -85,10 +83,9 @@ type AccountAvailability struct {
 	IsOverloaded  bool `json:"is_overloaded"`
 	HasError      bool `json:"has_error"`

-	RateLimitResetAt       *time.Time       `json:"rate_limit_reset_at"`
-	RateLimitRemainingSec  *int64           `json:"rate_limit_remaining_sec"`
-	ScopeRateLimits        map[string]int64 `json:"scope_rate_limits,omitempty"`
-	OverloadUntil          *time.Time       `json:"overload_until"`
+	RateLimitResetAt       *time.Time `json:"rate_limit_reset_at"`
+	RateLimitRemainingSec  *int64     `json:"rate_limit_remaining_sec"`
+	OverloadUntil          *time.Time `json:"overload_until"`
 	OverloadRemainingSec   *int64           `json:"overload_remaining_sec"`
 	ErrorMessage           string           `json:"error_message"`
 	TempUnschedulableUntil *time.Time       `json:"temp_unschedulable_until,omitempty"`