From 6114f69cca038fb7a897386fa2aa745f8dd96ab0 Mon Sep 17 00:00:00 2001 From: Edric Li Date: Tue, 10 Feb 2026 02:03:06 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20MODEL=5FCAPACITY=5FEXHAUSTED=20?= =?UTF-8?q?=E4=BD=BF=E7=94=A8=E5=9B=BA=E5=AE=9A1s=E9=97=B4=E9=9A=94?= =?UTF-8?q?=E9=87=8D=E8=AF=9560=E6=AC=A1=EF=BC=8C=E4=B8=8D=E5=88=87?= =?UTF-8?q?=E6=8D=A2=E8=B4=A6=E5=8F=B7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MODEL_CAPACITY_EXHAUSTED (503) 表示模型容量不足,所有账号共享同一容量池, 切换账号无意义。改为固定1s间隔重试最多60次,重试耗尽后直接返回上游错误。 - 新增 antigravityModelCapacityRetryMaxAttempts=60 和 antigravityModelCapacityRetryWait=1s - shouldTriggerAntigravitySmartRetry 新增 isModelCapacityExhausted 返回值 - handleSmartRetry 对 MODEL_CAPACITY_EXHAUSTED 使用独立重试策略 - handleModelRateLimit 对 MODEL_CAPACITY_EXHAUSTED 仅标记 Handled,不设限流 - 重试耗尽后不设置模型限流、不清除粘性会话、不切换账号 --- .../service/antigravity_gateway_service.go | 113 +++++++++++++----- .../service/antigravity_rate_limit_test.go | 76 +++++++----- .../service/antigravity_smart_retry_test.go | 101 ++++++++++++---- 3 files changed, 207 insertions(+), 83 deletions(-) diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go index 9c2b9027..54b6d383 100644 --- a/backend/internal/service/antigravity_gateway_service.go +++ b/backend/internal/service/antigravity_gateway_service.go @@ -39,6 +39,12 @@ const ( antigravitySmartRetryMaxAttempts = 1 // 智能重试最大次数(仅重试 1 次,防止重复限流/长期等待) antigravityDefaultRateLimitDuration = 30 * time.Second // 默认限流时间(无 retryDelay 时使用) + // MODEL_CAPACITY_EXHAUSTED 专用重试参数 + // 模型容量不足时,所有账号共享同一容量池,切换账号无意义 + // 使用固定 1s 间隔重试,最多重试 60 次 + antigravityModelCapacityRetryMaxAttempts = 60 + antigravityModelCapacityRetryWait = 1 * time.Second + // Google RPC 状态和类型常量 googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED" googleRPCStatusUnavailable = "UNAVAILABLE" @@ -144,7 +150,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam } // 判断是否触发智能重试 - shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName := shouldTriggerAntigravitySmartRetry(p.account, respBody) + shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(p.account, respBody) // 情况1: retryDelay >= 阈值,限流模型并切换账号 if shouldRateLimitModel { @@ -174,14 +180,21 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam } } - // 情况2: retryDelay < 阈值,智能重试(最多 antigravitySmartRetryMaxAttempts 次) + // 情况2: retryDelay < 阈值(或 MODEL_CAPACITY_EXHAUSTED),智能重试 if shouldSmartRetry { var lastRetryResp *http.Response var lastRetryBody []byte - for attempt := 1; attempt <= antigravitySmartRetryMaxAttempts; attempt++ { + // MODEL_CAPACITY_EXHAUSTED 使用独立的重试参数(60 次,固定 1s 间隔) + maxAttempts := antigravitySmartRetryMaxAttempts + if isModelCapacityExhausted { + maxAttempts = antigravityModelCapacityRetryMaxAttempts + waitDuration = antigravityModelCapacityRetryWait + } + + for attempt := 1; attempt <= maxAttempts; attempt++ { log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d", - p.prefix, resp.StatusCode, attempt, antigravitySmartRetryMaxAttempts, waitDuration, modelName, p.account.ID) + p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID) select { case <-p.ctx.Done(): @@ -207,13 +220,13 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency) if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable { - log.Printf("%s status=%d smart_retry_success attempt=%d/%d", p.prefix, retryResp.StatusCode, attempt, antigravitySmartRetryMaxAttempts) + log.Printf("%s status=%d smart_retry_success attempt=%d/%d", p.prefix, retryResp.StatusCode, attempt, maxAttempts) return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} } // 网络错误时,继续重试 if retryErr != nil || retryResp == nil { - log.Printf("%s status=smart_retry_network_error attempt=%d/%d error=%v", p.prefix, attempt, antigravitySmartRetryMaxAttempts, retryErr) + log.Printf("%s status=smart_retry_network_error attempt=%d/%d error=%v", p.prefix, attempt, maxAttempts, retryErr) continue } @@ -227,26 +240,43 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam _ = retryResp.Body.Close() } - // 解析新的重试信息,用于下次重试的等待时间 - if attempt < antigravitySmartRetryMaxAttempts && lastRetryBody != nil { - newShouldRetry, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody) + // 解析新的重试信息,用于下次重试的等待时间(MODEL_CAPACITY_EXHAUSTED 使用固定循环,跳过) + if !isModelCapacityExhausted && attempt < maxAttempts && lastRetryBody != nil { + newShouldRetry, _, newWaitDuration, _, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody) if newShouldRetry && newWaitDuration > 0 { waitDuration = newWaitDuration } } } - // 所有重试都失败,限流当前模型并切换账号 - rateLimitDuration := waitDuration - if rateLimitDuration <= 0 { - rateLimitDuration = antigravityDefaultRateLimitDuration - } + // 所有重试都失败 retryBody := lastRetryBody if retryBody == nil { retryBody = respBody } + + // MODEL_CAPACITY_EXHAUSTED:模型容量不足,切换账号无意义 + // 直接返回上游错误响应,不设置模型限流,不切换账号 + if isModelCapacityExhausted { + log.Printf("%s status=%d smart_retry_exhausted_model_capacity attempts=%d model=%s account=%d body=%s (model capacity exhausted, not switching account)", + p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200)) + return &smartRetryResult{ + action: smartRetryActionBreakWithResp, + resp: &http.Response{ + StatusCode: resp.StatusCode, + Header: resp.Header.Clone(), + Body: io.NopCloser(bytes.NewReader(retryBody)), + }, + } + } + + // RATE_LIMIT_EXCEEDED:账号级限流,限流当前模型并切换账号 + rateLimitDuration := waitDuration + if rateLimitDuration <= 0 { + rateLimitDuration = antigravityDefaultRateLimitDuration + } log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)", - p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200)) + p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200)) resetAt := time.Now().Add(rateLimitDuration) if p.accountRepo != nil && modelName != "" { @@ -2053,8 +2083,9 @@ func antigravityFallbackCooldownSeconds() (time.Duration, bool) { // antigravitySmartRetryInfo 智能重试所需的信息 type antigravitySmartRetryInfo struct { - RetryDelay time.Duration // 重试延迟时间 - ModelName string // 限流的模型名称(如 "claude-sonnet-4-5") + RetryDelay time.Duration // 重试延迟时间 + ModelName string // 限流的模型名称(如 "claude-sonnet-4-5") + IsModelCapacityExhausted bool // 是否为模型容量不足(MODEL_CAPACITY_EXHAUSTED) } // parseAntigravitySmartRetryInfo 解析 Google RPC RetryInfo 和 ErrorInfo 信息 @@ -2169,31 +2200,40 @@ func parseAntigravitySmartRetryInfo(body []byte) *antigravitySmartRetryInfo { } return &antigravitySmartRetryInfo{ - RetryDelay: retryDelay, - ModelName: modelName, + RetryDelay: retryDelay, + ModelName: modelName, + IsModelCapacityExhausted: hasModelCapacityExhausted, } } // shouldTriggerAntigravitySmartRetry 判断是否应该触发智能重试 // 返回: -// - shouldRetry: 是否应该智能重试(retryDelay < antigravityRateLimitThreshold) -// - shouldRateLimitModel: 是否应该限流模型(retryDelay >= antigravityRateLimitThreshold) -// - waitDuration: 等待时间(智能重试时使用,shouldRateLimitModel=true 时为 0) +// - shouldRetry: 是否应该智能重试(retryDelay < antigravityRateLimitThreshold,或 MODEL_CAPACITY_EXHAUSTED) +// - shouldRateLimitModel: 是否应该限流模型并切换账号(仅 RATE_LIMIT_EXCEEDED 且 retryDelay >= 阈值) +// - waitDuration: 等待时间 // - modelName: 限流的模型名称 -func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string) { +// - isModelCapacityExhausted: 是否为模型容量不足(MODEL_CAPACITY_EXHAUSTED) +func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string, isModelCapacityExhausted bool) { if account.Platform != PlatformAntigravity { - return false, false, 0, "" + return false, false, 0, "", false } info := parseAntigravitySmartRetryInfo(respBody) if info == nil { - return false, false, 0, "" + return false, false, 0, "", false } + // MODEL_CAPACITY_EXHAUSTED(模型容量不足):所有账号共享同一模型容量池 + // 切换账号无意义,使用固定 1s 间隔重试 + if info.IsModelCapacityExhausted { + return true, false, antigravityModelCapacityRetryWait, info.ModelName, true + } + + // RATE_LIMIT_EXCEEDED(账号级限流): // retryDelay >= 阈值:直接限流模型,不重试 // 注意:如果上游未提供 retryDelay,parseAntigravitySmartRetryInfo 已设置为默认 30s if info.RetryDelay >= antigravityRateLimitThreshold { - return false, true, info.RetryDelay, info.ModelName + return false, true, info.RetryDelay, info.ModelName, false } // retryDelay < 阈值:智能重试 @@ -2202,7 +2242,7 @@ func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shou waitDuration = antigravitySmartRetryMinWait } - return true, false, waitDuration, info.ModelName + return true, false, waitDuration, info.ModelName, false } // handleModelRateLimitParams 模型级限流处理参数 @@ -2228,8 +2268,9 @@ type handleModelRateLimitResult struct { // handleModelRateLimit 处理模型级限流(在原有逻辑之前调用) // 仅处理 429/503,解析模型名和 retryDelay -// - retryDelay < antigravityRateLimitThreshold: 返回 ShouldRetry=true,由调用方等待后重试 -// - retryDelay >= antigravityRateLimitThreshold: 设置模型限流 + 清除粘性会话 + 返回 SwitchError +// - MODEL_CAPACITY_EXHAUSTED: 返回 Handled=true(实际重试由 handleSmartRetry 处理) +// - RATE_LIMIT_EXCEEDED + retryDelay < 阈值: 返回 ShouldRetry=true,由调用方等待后重试 +// - RATE_LIMIT_EXCEEDED + retryDelay >= 阈值: 设置模型限流 + 清除粘性会话 + 返回 SwitchError func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimitParams) *handleModelRateLimitResult { if p.statusCode != 429 && p.statusCode != 503 { return &handleModelRateLimitResult{Handled: false} @@ -2240,7 +2281,17 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit return &handleModelRateLimitResult{Handled: false} } - // < antigravityRateLimitThreshold: 等待后重试 + // MODEL_CAPACITY_EXHAUSTED:模型容量不足,所有账号共享同一容量池 + // 切换账号无意义,不设置模型限流(实际重试由 handleSmartRetry 处理) + if info.IsModelCapacityExhausted { + log.Printf("%s status=%d model_capacity_exhausted model=%s (not switching account, retry handled by smart retry)", + p.prefix, p.statusCode, info.ModelName) + return &handleModelRateLimitResult{ + Handled: true, + } + } + + // RATE_LIMIT_EXCEEDED: < antigravityRateLimitThreshold: 等待后重试 if info.RetryDelay < antigravityRateLimitThreshold { log.Printf("%s status=%d model_rate_limit_wait model=%s wait=%v", p.prefix, p.statusCode, info.ModelName, info.RetryDelay) @@ -2251,7 +2302,7 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit } } - // >= antigravityRateLimitThreshold: 设置限流 + 清除粘性会话 + 切换账号 + // RATE_LIMIT_EXCEEDED: >= antigravityRateLimitThreshold: 设置限流 + 清除粘性会话 + 切换账号 s.setModelRateLimitAndClearSession(p, info) return &handleModelRateLimitResult{ diff --git a/backend/internal/service/antigravity_rate_limit_test.go b/backend/internal/service/antigravity_rate_limit_test.go index 59cc9331..7175d578 100644 --- a/backend/internal/service/antigravity_rate_limit_test.go +++ b/backend/internal/service/antigravity_rate_limit_test.go @@ -188,13 +188,14 @@ func TestHandleUpstreamError_429_NonModelRateLimit(t *testing.T) { require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey) } -// TestHandleUpstreamError_503_ModelRateLimit 测试 503 模型限流场景 -func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) { +// TestHandleUpstreamError_503_ModelCapacityExhausted 测试 503 模型容量不足场景 +// MODEL_CAPACITY_EXHAUSTED 时应等待重试,不切换账号 +func TestHandleUpstreamError_503_ModelCapacityExhausted(t *testing.T) { repo := &stubAntigravityAccountRepo{} svc := &AntigravityGatewayService{accountRepo: repo} account := &Account{ID: 3, Name: "acc-3", Platform: PlatformAntigravity} - // 503 + MODEL_CAPACITY_EXHAUSTED → 模型限流 + // 503 + MODEL_CAPACITY_EXHAUSTED → 等待重试,不切换账号 body := []byte(`{ "error": { "status": "UNAVAILABLE", @@ -207,13 +208,13 @@ func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) { result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false) - // 应该触发模型限流 + // MODEL_CAPACITY_EXHAUSTED 应该标记为已处理,不切换账号,不设置模型限流 + // 实际重试由 handleSmartRetry 处理 require.NotNil(t, result) require.True(t, result.Handled) - require.NotNil(t, result.SwitchError) - require.Equal(t, "gemini-3-pro-high", result.SwitchError.RateLimitedModel) - require.Len(t, repo.modelRateLimitCalls, 1) - require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey) + require.False(t, result.ShouldRetry, "MODEL_CAPACITY_EXHAUSTED should not trigger retry from handleModelRateLimit path") + require.Nil(t, result.SwitchError, "MODEL_CAPACITY_EXHAUSTED should not trigger account switch") + require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit") } // TestHandleUpstreamError_503_NonModelRateLimit 测试 503 非模型限流场景(不处理) @@ -301,11 +302,12 @@ func TestParseGeminiRateLimitResetTime_QuotaResetDelay_RoundsUp(t *testing.T) { func TestParseAntigravitySmartRetryInfo(t *testing.T) { tests := []struct { - name string - body string - expectedDelay time.Duration - expectedModel string - expectedNil bool + name string + body string + expectedDelay time.Duration + expectedModel string + expectedNil bool + expectedIsModelCapacityExhausted bool }{ { name: "valid complete response with RATE_LIMIT_EXCEEDED", @@ -368,8 +370,9 @@ func TestParseAntigravitySmartRetryInfo(t *testing.T) { "message": "No capacity available for model gemini-3-pro-high on the server" } }`, - expectedDelay: 39 * time.Second, - expectedModel: "gemini-3-pro-high", + expectedDelay: 39 * time.Second, + expectedModel: "gemini-3-pro-high", + expectedIsModelCapacityExhausted: true, }, { name: "503 UNAVAILABLE without MODEL_CAPACITY_EXHAUSTED - should return nil", @@ -480,6 +483,9 @@ func TestParseAntigravitySmartRetryInfo(t *testing.T) { if result.ModelName != tt.expectedModel { t.Errorf("ModelName = %q, want %q", result.ModelName, tt.expectedModel) } + if result.IsModelCapacityExhausted != tt.expectedIsModelCapacityExhausted { + t.Errorf("IsModelCapacityExhausted = %v, want %v", result.IsModelCapacityExhausted, tt.expectedIsModelCapacityExhausted) + } }) } } @@ -491,13 +497,14 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) { apiKeyAccount := &Account{Type: AccountTypeAPIKey} tests := []struct { - name string - account *Account - body string - expectedShouldRetry bool - expectedShouldRateLimit bool - minWait time.Duration - modelName string + name string + account *Account + body string + expectedShouldRetry bool + expectedShouldRateLimit bool + expectedIsModelCapacityExhausted bool + minWait time.Duration + modelName string }{ { name: "OAuth account with short delay (< 7s) - smart retry", @@ -611,13 +618,14 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) { ] } }`, - expectedShouldRetry: false, - expectedShouldRateLimit: true, - minWait: 39 * time.Second, - modelName: "gemini-3-pro-high", + expectedShouldRetry: true, + expectedShouldRateLimit: false, + expectedIsModelCapacityExhausted: true, + minWait: 1 * time.Second, + modelName: "gemini-3-pro-high", }, { - name: "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - no retryDelay - use default rate limit", + name: "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - no retryDelay - use fixed wait", account: oauthAccount, body: `{ "error": { @@ -629,10 +637,11 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) { "message": "No capacity available for model gemini-2.5-flash on the server" } }`, - expectedShouldRetry: false, - expectedShouldRateLimit: true, - minWait: 30 * time.Second, - modelName: "gemini-2.5-flash", + expectedShouldRetry: true, + expectedShouldRateLimit: false, + expectedIsModelCapacityExhausted: true, + minWait: 1 * time.Second, + modelName: "gemini-2.5-flash", }, { name: "429 RESOURCE_EXHAUSTED with RATE_LIMIT_EXCEEDED - no retryDelay - use default rate limit", @@ -656,13 +665,16 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - shouldRetry, shouldRateLimit, wait, model := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body)) + shouldRetry, shouldRateLimit, wait, model, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body)) if shouldRetry != tt.expectedShouldRetry { t.Errorf("shouldRetry = %v, want %v", shouldRetry, tt.expectedShouldRetry) } if shouldRateLimit != tt.expectedShouldRateLimit { t.Errorf("shouldRateLimit = %v, want %v", shouldRateLimit, tt.expectedShouldRateLimit) } + if isModelCapacityExhausted != tt.expectedIsModelCapacityExhausted { + t.Errorf("isModelCapacityExhausted = %v, want %v", isModelCapacityExhausted, tt.expectedIsModelCapacityExhausted) + } if shouldRetry { if wait < tt.minWait { t.Errorf("wait = %v, want >= %v", wait, tt.minWait) diff --git a/backend/internal/service/antigravity_smart_retry_test.go b/backend/internal/service/antigravity_smart_retry_test.go index a7e0d296..432c80e5 100644 --- a/backend/internal/service/antigravity_smart_retry_test.go +++ b/backend/internal/service/antigravity_smart_retry_test.go @@ -294,8 +294,9 @@ func TestHandleSmartRetry_ShortDelay_SmartRetryFailed_ReturnsSwitchError(t *test require.Len(t, upstream.calls, 1, "should have made one retry call (max attempts)") } -// TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError 测试 503 MODEL_CAPACITY_EXHAUSTED 返回 switchError -func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testing.T) { +// TestHandleSmartRetry_503_ModelCapacityExhausted_RetrySuccess 测试 503 MODEL_CAPACITY_EXHAUSTED 重试成功 +// MODEL_CAPACITY_EXHAUSTED 使用固定 1s 间隔重试,不切换账号 +func TestHandleSmartRetry_503_ModelCapacityExhausted_RetrySuccess(t *testing.T) { repo := &stubAntigravityAccountRepo{} account := &Account{ ID: 3, @@ -304,7 +305,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi Platform: PlatformAntigravity, } - // 503 + MODEL_CAPACITY_EXHAUSTED + 39s >= 7s 阈值 + // 503 + MODEL_CAPACITY_EXHAUSTED + 39s(上游 retryDelay 应被忽略,使用固定 1s) respBody := []byte(`{ "error": { "code": 503, @@ -322,6 +323,14 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi Body: io.NopCloser(bytes.NewReader(respBody)), } + // mock: 第 1 次重试返回 200 成功 + upstream := &mockSmartRetryUpstream{ + responses: []*http.Response{ + {StatusCode: http.StatusOK, Header: http.Header{}, Body: io.NopCloser(strings.NewReader(`{"ok":true}`))}, + }, + errors: []error{nil}, + } + params := antigravityRetryLoopParams{ ctx: context.Background(), prefix: "[test]", @@ -330,6 +339,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi action: "generateContent", body: []byte(`{"input":"test"}`), accountRepo: repo, + httpUpstream: upstream, isStickySession: true, handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { return nil @@ -343,16 +353,67 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi require.NotNil(t, result) require.Equal(t, smartRetryActionBreakWithResp, result.action) - require.Nil(t, result.resp) + require.NotNil(t, result.resp, "should return successful response") + require.Equal(t, http.StatusOK, result.resp.StatusCode) require.Nil(t, result.err) - require.NotNil(t, result.switchError, "should return switchError for 503 model capacity exhausted") - require.Equal(t, account.ID, result.switchError.OriginalAccountID) - require.Equal(t, "gemini-3-pro-high", result.switchError.RateLimitedModel) - require.True(t, result.switchError.IsStickySession) + require.Nil(t, result.switchError, "MODEL_CAPACITY_EXHAUSTED should not return switchError") - // 验证模型限流已设置 - require.Len(t, repo.modelRateLimitCalls, 1) - require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey) + // 不应设置模型限流 + require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit") + require.Len(t, upstream.calls, 1, "should have made one retry call before success") +} + +// TestHandleSmartRetry_503_ModelCapacityExhausted_ContextCancel 测试 MODEL_CAPACITY_EXHAUSTED 上下文取消 +func TestHandleSmartRetry_503_ModelCapacityExhausted_ContextCancel(t *testing.T) { + repo := &stubAntigravityAccountRepo{} + account := &Account{ + ID: 3, + Name: "acc-3", + Type: AccountTypeOAuth, + Platform: PlatformAntigravity, + } + + respBody := []byte(`{ + "error": { + "code": 503, + "status": "UNAVAILABLE", + "details": [ + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"} + ] + } + }`) + resp := &http.Response{ + StatusCode: http.StatusServiceUnavailable, + Header: http.Header{}, + Body: io.NopCloser(bytes.NewReader(respBody)), + } + + // 立即取消上下文,验证重试循环能正确退出 + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + params := antigravityRetryLoopParams{ + ctx: ctx, + prefix: "[test]", + account: account, + accessToken: "token", + action: "generateContent", + body: []byte(`{"input":"test"}`), + accountRepo: repo, + handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { + return nil + }, + } + + svc := &AntigravityGatewayService{} + result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, []string{"https://ag-1.test"}) + + require.NotNil(t, result) + require.Equal(t, smartRetryActionBreakWithResp, result.action) + require.Error(t, result.err, "should return context error") + require.Nil(t, result.switchError, "should not return switchError on context cancel") + require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit on context cancel") } // TestHandleSmartRetry_NonAntigravityAccount_ContinuesDefaultLogic 测试非 Antigravity 平台账号走默认逻辑 @@ -1129,20 +1190,20 @@ func TestHandleSmartRetry_ShortDelay_NetworkError_StickySession_ClearsSession(t } // TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession -// 503 + 短延迟 + 粘性会话 + 重试失败 → 清除粘性绑定 +// 429 + 短延迟 + 粘性会话 + 重试失败 → 清除粘性绑定 func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession(t *testing.T) { failRespBody := `{ "error": { - "code": 503, - "status": "UNAVAILABLE", + "code": 429, + "status": "RESOURCE_EXHAUSTED", "details": [ - {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "RATE_LIMIT_EXCEEDED"}, {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"} ] } }` failResp := &http.Response{ - StatusCode: http.StatusServiceUnavailable, + StatusCode: http.StatusTooManyRequests, Header: http.Header{}, Body: io.NopCloser(strings.NewReader(failRespBody)), } @@ -1162,16 +1223,16 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession respBody := []byte(`{ "error": { - "code": 503, - "status": "UNAVAILABLE", + "code": 429, + "status": "RESOURCE_EXHAUSTED", "details": [ - {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, + {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "RATE_LIMIT_EXCEEDED"}, {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"} ] } }`) resp := &http.Response{ - StatusCode: http.StatusServiceUnavailable, + StatusCode: http.StatusTooManyRequests, Header: http.Header{}, Body: io.NopCloser(bytes.NewReader(respBody)), }