feat: MODEL_CAPACITY_EXHAUSTED 使用固定1s间隔重试60次,不切换账号

MODEL_CAPACITY_EXHAUSTED (503) 表示模型容量不足,所有账号共享同一容量池,
切换账号无意义。改为固定1s间隔重试最多60次,重试耗尽后直接返回上游错误。

- 新增 antigravityModelCapacityRetryMaxAttempts=60 和 antigravityModelCapacityRetryWait=1s
- shouldTriggerAntigravitySmartRetry 新增 isModelCapacityExhausted 返回值
- handleSmartRetry 对 MODEL_CAPACITY_EXHAUSTED 使用独立重试策略
- handleModelRateLimit 对 MODEL_CAPACITY_EXHAUSTED 仅标记 Handled,不设限流
- 重试耗尽后不设置模型限流、不清除粘性会话、不切换账号
This commit is contained in:
Edric Li
2026-02-10 02:03:06 +08:00
parent d6c2921f2b
commit 6114f69cca
3 changed files with 207 additions and 83 deletions

View File

@@ -39,6 +39,12 @@ const (
antigravitySmartRetryMaxAttempts = 1 // 智能重试最大次数(仅重试 1 次,防止重复限流/长期等待) antigravitySmartRetryMaxAttempts = 1 // 智能重试最大次数(仅重试 1 次,防止重复限流/长期等待)
antigravityDefaultRateLimitDuration = 30 * time.Second // 默认限流时间(无 retryDelay 时使用) antigravityDefaultRateLimitDuration = 30 * time.Second // 默认限流时间(无 retryDelay 时使用)
// MODEL_CAPACITY_EXHAUSTED 专用重试参数
// 模型容量不足时,所有账号共享同一容量池,切换账号无意义
// 使用固定 1s 间隔重试,最多重试 60 次
antigravityModelCapacityRetryMaxAttempts = 60
antigravityModelCapacityRetryWait = 1 * time.Second
// Google RPC 状态和类型常量 // Google RPC 状态和类型常量
googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED" googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED"
googleRPCStatusUnavailable = "UNAVAILABLE" googleRPCStatusUnavailable = "UNAVAILABLE"
@@ -144,7 +150,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
} }
// 判断是否触发智能重试 // 判断是否触发智能重试
shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName := shouldTriggerAntigravitySmartRetry(p.account, respBody) shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(p.account, respBody)
// 情况1: retryDelay >= 阈值,限流模型并切换账号 // 情况1: retryDelay >= 阈值,限流模型并切换账号
if shouldRateLimitModel { if shouldRateLimitModel {
@@ -174,14 +180,21 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
} }
} }
// 情况2: retryDelay < 阈值,智能重试(最多 antigravitySmartRetryMaxAttempts 次) // 情况2: retryDelay < 阈值(或 MODEL_CAPACITY_EXHAUSTED智能重试
if shouldSmartRetry { if shouldSmartRetry {
var lastRetryResp *http.Response var lastRetryResp *http.Response
var lastRetryBody []byte var lastRetryBody []byte
for attempt := 1; attempt <= antigravitySmartRetryMaxAttempts; attempt++ { // MODEL_CAPACITY_EXHAUSTED 使用独立的重试参数60 次,固定 1s 间隔)
maxAttempts := antigravitySmartRetryMaxAttempts
if isModelCapacityExhausted {
maxAttempts = antigravityModelCapacityRetryMaxAttempts
waitDuration = antigravityModelCapacityRetryWait
}
for attempt := 1; attempt <= maxAttempts; attempt++ {
log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d", log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
p.prefix, resp.StatusCode, attempt, antigravitySmartRetryMaxAttempts, waitDuration, modelName, p.account.ID) p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
select { select {
case <-p.ctx.Done(): case <-p.ctx.Done():
@@ -207,13 +220,13 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency) retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable { if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
log.Printf("%s status=%d smart_retry_success attempt=%d/%d", p.prefix, retryResp.StatusCode, attempt, antigravitySmartRetryMaxAttempts) log.Printf("%s status=%d smart_retry_success attempt=%d/%d", p.prefix, retryResp.StatusCode, attempt, maxAttempts)
return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp} return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
} }
// 网络错误时,继续重试 // 网络错误时,继续重试
if retryErr != nil || retryResp == nil { if retryErr != nil || retryResp == nil {
log.Printf("%s status=smart_retry_network_error attempt=%d/%d error=%v", p.prefix, attempt, antigravitySmartRetryMaxAttempts, retryErr) log.Printf("%s status=smart_retry_network_error attempt=%d/%d error=%v", p.prefix, attempt, maxAttempts, retryErr)
continue continue
} }
@@ -227,26 +240,43 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
_ = retryResp.Body.Close() _ = retryResp.Body.Close()
} }
// 解析新的重试信息,用于下次重试的等待时间 // 解析新的重试信息,用于下次重试的等待时间MODEL_CAPACITY_EXHAUSTED 使用固定循环,跳过)
if attempt < antigravitySmartRetryMaxAttempts && lastRetryBody != nil { if !isModelCapacityExhausted && attempt < maxAttempts && lastRetryBody != nil {
newShouldRetry, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody) newShouldRetry, _, newWaitDuration, _, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
if newShouldRetry && newWaitDuration > 0 { if newShouldRetry && newWaitDuration > 0 {
waitDuration = newWaitDuration waitDuration = newWaitDuration
} }
} }
} }
// 所有重试都失败,限流当前模型并切换账号 // 所有重试都失败
rateLimitDuration := waitDuration
if rateLimitDuration <= 0 {
rateLimitDuration = antigravityDefaultRateLimitDuration
}
retryBody := lastRetryBody retryBody := lastRetryBody
if retryBody == nil { if retryBody == nil {
retryBody = respBody retryBody = respBody
} }
// MODEL_CAPACITY_EXHAUSTED模型容量不足切换账号无意义
// 直接返回上游错误响应,不设置模型限流,不切换账号
if isModelCapacityExhausted {
log.Printf("%s status=%d smart_retry_exhausted_model_capacity attempts=%d model=%s account=%d body=%s (model capacity exhausted, not switching account)",
p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200))
return &smartRetryResult{
action: smartRetryActionBreakWithResp,
resp: &http.Response{
StatusCode: resp.StatusCode,
Header: resp.Header.Clone(),
Body: io.NopCloser(bytes.NewReader(retryBody)),
},
}
}
// RATE_LIMIT_EXCEEDED账号级限流限流当前模型并切换账号
rateLimitDuration := waitDuration
if rateLimitDuration <= 0 {
rateLimitDuration = antigravityDefaultRateLimitDuration
}
log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)", log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)",
p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200)) p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200))
resetAt := time.Now().Add(rateLimitDuration) resetAt := time.Now().Add(rateLimitDuration)
if p.accountRepo != nil && modelName != "" { if p.accountRepo != nil && modelName != "" {
@@ -2053,8 +2083,9 @@ func antigravityFallbackCooldownSeconds() (time.Duration, bool) {
// antigravitySmartRetryInfo 智能重试所需的信息 // antigravitySmartRetryInfo 智能重试所需的信息
type antigravitySmartRetryInfo struct { type antigravitySmartRetryInfo struct {
RetryDelay time.Duration // 重试延迟时间 RetryDelay time.Duration // 重试延迟时间
ModelName string // 限流的模型名称(如 "claude-sonnet-4-5" ModelName string // 限流的模型名称(如 "claude-sonnet-4-5"
IsModelCapacityExhausted bool // 是否为模型容量不足MODEL_CAPACITY_EXHAUSTED
} }
// parseAntigravitySmartRetryInfo 解析 Google RPC RetryInfo 和 ErrorInfo 信息 // parseAntigravitySmartRetryInfo 解析 Google RPC RetryInfo 和 ErrorInfo 信息
@@ -2169,31 +2200,40 @@ func parseAntigravitySmartRetryInfo(body []byte) *antigravitySmartRetryInfo {
} }
return &antigravitySmartRetryInfo{ return &antigravitySmartRetryInfo{
RetryDelay: retryDelay, RetryDelay: retryDelay,
ModelName: modelName, ModelName: modelName,
IsModelCapacityExhausted: hasModelCapacityExhausted,
} }
} }
// shouldTriggerAntigravitySmartRetry 判断是否应该触发智能重试 // shouldTriggerAntigravitySmartRetry 判断是否应该触发智能重试
// 返回: // 返回:
// - shouldRetry: 是否应该智能重试retryDelay < antigravityRateLimitThreshold // - shouldRetry: 是否应该智能重试retryDelay < antigravityRateLimitThreshold,或 MODEL_CAPACITY_EXHAUSTED
// - shouldRateLimitModel: 是否应该限流模型retryDelay >= antigravityRateLimitThreshold // - shouldRateLimitModel: 是否应该限流模型并切换账号(仅 RATE_LIMIT_EXCEEDED 且 retryDelay >= 阈值
// - waitDuration: 等待时间智能重试时使用shouldRateLimitModel=true 时为 0 // - waitDuration: 等待时间
// - modelName: 限流的模型名称 // - modelName: 限流的模型名称
func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string) { // - isModelCapacityExhausted: 是否为模型容量不足MODEL_CAPACITY_EXHAUSTED
func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string, isModelCapacityExhausted bool) {
if account.Platform != PlatformAntigravity { if account.Platform != PlatformAntigravity {
return false, false, 0, "" return false, false, 0, "", false
} }
info := parseAntigravitySmartRetryInfo(respBody) info := parseAntigravitySmartRetryInfo(respBody)
if info == nil { if info == nil {
return false, false, 0, "" return false, false, 0, "", false
} }
// MODEL_CAPACITY_EXHAUSTED模型容量不足所有账号共享同一模型容量池
// 切换账号无意义,使用固定 1s 间隔重试
if info.IsModelCapacityExhausted {
return true, false, antigravityModelCapacityRetryWait, info.ModelName, true
}
// RATE_LIMIT_EXCEEDED账号级限流
// retryDelay >= 阈值:直接限流模型,不重试 // retryDelay >= 阈值:直接限流模型,不重试
// 注意:如果上游未提供 retryDelayparseAntigravitySmartRetryInfo 已设置为默认 30s // 注意:如果上游未提供 retryDelayparseAntigravitySmartRetryInfo 已设置为默认 30s
if info.RetryDelay >= antigravityRateLimitThreshold { if info.RetryDelay >= antigravityRateLimitThreshold {
return false, true, info.RetryDelay, info.ModelName return false, true, info.RetryDelay, info.ModelName, false
} }
// retryDelay < 阈值:智能重试 // retryDelay < 阈值:智能重试
@@ -2202,7 +2242,7 @@ func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shou
waitDuration = antigravitySmartRetryMinWait waitDuration = antigravitySmartRetryMinWait
} }
return true, false, waitDuration, info.ModelName return true, false, waitDuration, info.ModelName, false
} }
// handleModelRateLimitParams 模型级限流处理参数 // handleModelRateLimitParams 模型级限流处理参数
@@ -2228,8 +2268,9 @@ type handleModelRateLimitResult struct {
// handleModelRateLimit 处理模型级限流(在原有逻辑之前调用) // handleModelRateLimit 处理模型级限流(在原有逻辑之前调用)
// 仅处理 429/503解析模型名和 retryDelay // 仅处理 429/503解析模型名和 retryDelay
// - retryDelay < antigravityRateLimitThreshold: 返回 ShouldRetry=true由调用方等待后重试 // - MODEL_CAPACITY_EXHAUSTED: 返回 Handled=true实际重试由 handleSmartRetry 处理)
// - retryDelay >= antigravityRateLimitThreshold: 设置模型限流 + 清除粘性会话 + 返回 SwitchError // - RATE_LIMIT_EXCEEDED + retryDelay < 阈值: 返回 ShouldRetry=true由调用方等待后重试
// - RATE_LIMIT_EXCEEDED + retryDelay >= 阈值: 设置模型限流 + 清除粘性会话 + 返回 SwitchError
func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimitParams) *handleModelRateLimitResult { func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimitParams) *handleModelRateLimitResult {
if p.statusCode != 429 && p.statusCode != 503 { if p.statusCode != 429 && p.statusCode != 503 {
return &handleModelRateLimitResult{Handled: false} return &handleModelRateLimitResult{Handled: false}
@@ -2240,7 +2281,17 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit
return &handleModelRateLimitResult{Handled: false} return &handleModelRateLimitResult{Handled: false}
} }
// < antigravityRateLimitThreshold: 等待后重试 // MODEL_CAPACITY_EXHAUSTED模型容量不足所有账号共享同一容量池
// 切换账号无意义,不设置模型限流(实际重试由 handleSmartRetry 处理)
if info.IsModelCapacityExhausted {
log.Printf("%s status=%d model_capacity_exhausted model=%s (not switching account, retry handled by smart retry)",
p.prefix, p.statusCode, info.ModelName)
return &handleModelRateLimitResult{
Handled: true,
}
}
// RATE_LIMIT_EXCEEDED: < antigravityRateLimitThreshold: 等待后重试
if info.RetryDelay < antigravityRateLimitThreshold { if info.RetryDelay < antigravityRateLimitThreshold {
log.Printf("%s status=%d model_rate_limit_wait model=%s wait=%v", log.Printf("%s status=%d model_rate_limit_wait model=%s wait=%v",
p.prefix, p.statusCode, info.ModelName, info.RetryDelay) p.prefix, p.statusCode, info.ModelName, info.RetryDelay)
@@ -2251,7 +2302,7 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit
} }
} }
// >= antigravityRateLimitThreshold: 设置限流 + 清除粘性会话 + 切换账号 // RATE_LIMIT_EXCEEDED: >= antigravityRateLimitThreshold: 设置限流 + 清除粘性会话 + 切换账号
s.setModelRateLimitAndClearSession(p, info) s.setModelRateLimitAndClearSession(p, info)
return &handleModelRateLimitResult{ return &handleModelRateLimitResult{

View File

@@ -188,13 +188,14 @@ func TestHandleUpstreamError_429_NonModelRateLimit(t *testing.T) {
require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey) require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey)
} }
// TestHandleUpstreamError_503_ModelRateLimit 测试 503 模型限流场景 // TestHandleUpstreamError_503_ModelCapacityExhausted 测试 503 模型容量不足场景
func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) { // MODEL_CAPACITY_EXHAUSTED 时应等待重试,不切换账号
func TestHandleUpstreamError_503_ModelCapacityExhausted(t *testing.T) {
repo := &stubAntigravityAccountRepo{} repo := &stubAntigravityAccountRepo{}
svc := &AntigravityGatewayService{accountRepo: repo} svc := &AntigravityGatewayService{accountRepo: repo}
account := &Account{ID: 3, Name: "acc-3", Platform: PlatformAntigravity} account := &Account{ID: 3, Name: "acc-3", Platform: PlatformAntigravity}
// 503 + MODEL_CAPACITY_EXHAUSTED → 模型限流 // 503 + MODEL_CAPACITY_EXHAUSTED → 等待重试,不切换账号
body := []byte(`{ body := []byte(`{
"error": { "error": {
"status": "UNAVAILABLE", "status": "UNAVAILABLE",
@@ -207,13 +208,13 @@ func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) {
result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false) result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false)
// 应该触发模型限流 // MODEL_CAPACITY_EXHAUSTED 应该标记为已处理,不切换账号,不设置模型限流
// 实际重试由 handleSmartRetry 处理
require.NotNil(t, result) require.NotNil(t, result)
require.True(t, result.Handled) require.True(t, result.Handled)
require.NotNil(t, result.SwitchError) require.False(t, result.ShouldRetry, "MODEL_CAPACITY_EXHAUSTED should not trigger retry from handleModelRateLimit path")
require.Equal(t, "gemini-3-pro-high", result.SwitchError.RateLimitedModel) require.Nil(t, result.SwitchError, "MODEL_CAPACITY_EXHAUSTED should not trigger account switch")
require.Len(t, repo.modelRateLimitCalls, 1) require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit")
require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey)
} }
// TestHandleUpstreamError_503_NonModelRateLimit 测试 503 非模型限流场景(不处理) // TestHandleUpstreamError_503_NonModelRateLimit 测试 503 非模型限流场景(不处理)
@@ -301,11 +302,12 @@ func TestParseGeminiRateLimitResetTime_QuotaResetDelay_RoundsUp(t *testing.T) {
func TestParseAntigravitySmartRetryInfo(t *testing.T) { func TestParseAntigravitySmartRetryInfo(t *testing.T) {
tests := []struct { tests := []struct {
name string name string
body string body string
expectedDelay time.Duration expectedDelay time.Duration
expectedModel string expectedModel string
expectedNil bool expectedNil bool
expectedIsModelCapacityExhausted bool
}{ }{
{ {
name: "valid complete response with RATE_LIMIT_EXCEEDED", name: "valid complete response with RATE_LIMIT_EXCEEDED",
@@ -368,8 +370,9 @@ func TestParseAntigravitySmartRetryInfo(t *testing.T) {
"message": "No capacity available for model gemini-3-pro-high on the server" "message": "No capacity available for model gemini-3-pro-high on the server"
} }
}`, }`,
expectedDelay: 39 * time.Second, expectedDelay: 39 * time.Second,
expectedModel: "gemini-3-pro-high", expectedModel: "gemini-3-pro-high",
expectedIsModelCapacityExhausted: true,
}, },
{ {
name: "503 UNAVAILABLE without MODEL_CAPACITY_EXHAUSTED - should return nil", name: "503 UNAVAILABLE without MODEL_CAPACITY_EXHAUSTED - should return nil",
@@ -480,6 +483,9 @@ func TestParseAntigravitySmartRetryInfo(t *testing.T) {
if result.ModelName != tt.expectedModel { if result.ModelName != tt.expectedModel {
t.Errorf("ModelName = %q, want %q", result.ModelName, tt.expectedModel) t.Errorf("ModelName = %q, want %q", result.ModelName, tt.expectedModel)
} }
if result.IsModelCapacityExhausted != tt.expectedIsModelCapacityExhausted {
t.Errorf("IsModelCapacityExhausted = %v, want %v", result.IsModelCapacityExhausted, tt.expectedIsModelCapacityExhausted)
}
}) })
} }
} }
@@ -491,13 +497,14 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
apiKeyAccount := &Account{Type: AccountTypeAPIKey} apiKeyAccount := &Account{Type: AccountTypeAPIKey}
tests := []struct { tests := []struct {
name string name string
account *Account account *Account
body string body string
expectedShouldRetry bool expectedShouldRetry bool
expectedShouldRateLimit bool expectedShouldRateLimit bool
minWait time.Duration expectedIsModelCapacityExhausted bool
modelName string minWait time.Duration
modelName string
}{ }{
{ {
name: "OAuth account with short delay (< 7s) - smart retry", name: "OAuth account with short delay (< 7s) - smart retry",
@@ -611,13 +618,14 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
] ]
} }
}`, }`,
expectedShouldRetry: false, expectedShouldRetry: true,
expectedShouldRateLimit: true, expectedShouldRateLimit: false,
minWait: 39 * time.Second, expectedIsModelCapacityExhausted: true,
modelName: "gemini-3-pro-high", minWait: 1 * time.Second,
modelName: "gemini-3-pro-high",
}, },
{ {
name: "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - no retryDelay - use default rate limit", name: "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - no retryDelay - use fixed wait",
account: oauthAccount, account: oauthAccount,
body: `{ body: `{
"error": { "error": {
@@ -629,10 +637,11 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
"message": "No capacity available for model gemini-2.5-flash on the server" "message": "No capacity available for model gemini-2.5-flash on the server"
} }
}`, }`,
expectedShouldRetry: false, expectedShouldRetry: true,
expectedShouldRateLimit: true, expectedShouldRateLimit: false,
minWait: 30 * time.Second, expectedIsModelCapacityExhausted: true,
modelName: "gemini-2.5-flash", minWait: 1 * time.Second,
modelName: "gemini-2.5-flash",
}, },
{ {
name: "429 RESOURCE_EXHAUSTED with RATE_LIMIT_EXCEEDED - no retryDelay - use default rate limit", name: "429 RESOURCE_EXHAUSTED with RATE_LIMIT_EXCEEDED - no retryDelay - use default rate limit",
@@ -656,13 +665,16 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
shouldRetry, shouldRateLimit, wait, model := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body)) shouldRetry, shouldRateLimit, wait, model, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body))
if shouldRetry != tt.expectedShouldRetry { if shouldRetry != tt.expectedShouldRetry {
t.Errorf("shouldRetry = %v, want %v", shouldRetry, tt.expectedShouldRetry) t.Errorf("shouldRetry = %v, want %v", shouldRetry, tt.expectedShouldRetry)
} }
if shouldRateLimit != tt.expectedShouldRateLimit { if shouldRateLimit != tt.expectedShouldRateLimit {
t.Errorf("shouldRateLimit = %v, want %v", shouldRateLimit, tt.expectedShouldRateLimit) t.Errorf("shouldRateLimit = %v, want %v", shouldRateLimit, tt.expectedShouldRateLimit)
} }
if isModelCapacityExhausted != tt.expectedIsModelCapacityExhausted {
t.Errorf("isModelCapacityExhausted = %v, want %v", isModelCapacityExhausted, tt.expectedIsModelCapacityExhausted)
}
if shouldRetry { if shouldRetry {
if wait < tt.minWait { if wait < tt.minWait {
t.Errorf("wait = %v, want >= %v", wait, tt.minWait) t.Errorf("wait = %v, want >= %v", wait, tt.minWait)

View File

@@ -294,8 +294,9 @@ func TestHandleSmartRetry_ShortDelay_SmartRetryFailed_ReturnsSwitchError(t *test
require.Len(t, upstream.calls, 1, "should have made one retry call (max attempts)") require.Len(t, upstream.calls, 1, "should have made one retry call (max attempts)")
} }
// TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError 测试 503 MODEL_CAPACITY_EXHAUSTED 返回 switchError // TestHandleSmartRetry_503_ModelCapacityExhausted_RetrySuccess 测试 503 MODEL_CAPACITY_EXHAUSTED 重试成功
func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testing.T) { // MODEL_CAPACITY_EXHAUSTED 使用固定 1s 间隔重试,不切换账号
func TestHandleSmartRetry_503_ModelCapacityExhausted_RetrySuccess(t *testing.T) {
repo := &stubAntigravityAccountRepo{} repo := &stubAntigravityAccountRepo{}
account := &Account{ account := &Account{
ID: 3, ID: 3,
@@ -304,7 +305,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
Platform: PlatformAntigravity, Platform: PlatformAntigravity,
} }
// 503 + MODEL_CAPACITY_EXHAUSTED + 39s >= 7s 阈值 // 503 + MODEL_CAPACITY_EXHAUSTED + 39s(上游 retryDelay 应被忽略,使用固定 1s
respBody := []byte(`{ respBody := []byte(`{
"error": { "error": {
"code": 503, "code": 503,
@@ -322,6 +323,14 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
Body: io.NopCloser(bytes.NewReader(respBody)), Body: io.NopCloser(bytes.NewReader(respBody)),
} }
// mock: 第 1 次重试返回 200 成功
upstream := &mockSmartRetryUpstream{
responses: []*http.Response{
{StatusCode: http.StatusOK, Header: http.Header{}, Body: io.NopCloser(strings.NewReader(`{"ok":true}`))},
},
errors: []error{nil},
}
params := antigravityRetryLoopParams{ params := antigravityRetryLoopParams{
ctx: context.Background(), ctx: context.Background(),
prefix: "[test]", prefix: "[test]",
@@ -330,6 +339,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
action: "generateContent", action: "generateContent",
body: []byte(`{"input":"test"}`), body: []byte(`{"input":"test"}`),
accountRepo: repo, accountRepo: repo,
httpUpstream: upstream,
isStickySession: true, isStickySession: true,
handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult { handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
return nil return nil
@@ -343,16 +353,67 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
require.NotNil(t, result) require.NotNil(t, result)
require.Equal(t, smartRetryActionBreakWithResp, result.action) require.Equal(t, smartRetryActionBreakWithResp, result.action)
require.Nil(t, result.resp) require.NotNil(t, result.resp, "should return successful response")
require.Equal(t, http.StatusOK, result.resp.StatusCode)
require.Nil(t, result.err) require.Nil(t, result.err)
require.NotNil(t, result.switchError, "should return switchError for 503 model capacity exhausted") require.Nil(t, result.switchError, "MODEL_CAPACITY_EXHAUSTED should not return switchError")
require.Equal(t, account.ID, result.switchError.OriginalAccountID)
require.Equal(t, "gemini-3-pro-high", result.switchError.RateLimitedModel)
require.True(t, result.switchError.IsStickySession)
// 验证模型限流已设置 // 不应设置模型限流
require.Len(t, repo.modelRateLimitCalls, 1) require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit")
require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey) require.Len(t, upstream.calls, 1, "should have made one retry call before success")
}
// TestHandleSmartRetry_503_ModelCapacityExhausted_ContextCancel 测试 MODEL_CAPACITY_EXHAUSTED 上下文取消
func TestHandleSmartRetry_503_ModelCapacityExhausted_ContextCancel(t *testing.T) {
repo := &stubAntigravityAccountRepo{}
account := &Account{
ID: 3,
Name: "acc-3",
Type: AccountTypeOAuth,
Platform: PlatformAntigravity,
}
respBody := []byte(`{
"error": {
"code": 503,
"status": "UNAVAILABLE",
"details": [
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
]
}
}`)
resp := &http.Response{
StatusCode: http.StatusServiceUnavailable,
Header: http.Header{},
Body: io.NopCloser(bytes.NewReader(respBody)),
}
// 立即取消上下文,验证重试循环能正确退出
ctx, cancel := context.WithCancel(context.Background())
cancel()
params := antigravityRetryLoopParams{
ctx: ctx,
prefix: "[test]",
account: account,
accessToken: "token",
action: "generateContent",
body: []byte(`{"input":"test"}`),
accountRepo: repo,
handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
return nil
},
}
svc := &AntigravityGatewayService{}
result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, []string{"https://ag-1.test"})
require.NotNil(t, result)
require.Equal(t, smartRetryActionBreakWithResp, result.action)
require.Error(t, result.err, "should return context error")
require.Nil(t, result.switchError, "should not return switchError on context cancel")
require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit on context cancel")
} }
// TestHandleSmartRetry_NonAntigravityAccount_ContinuesDefaultLogic 测试非 Antigravity 平台账号走默认逻辑 // TestHandleSmartRetry_NonAntigravityAccount_ContinuesDefaultLogic 测试非 Antigravity 平台账号走默认逻辑
@@ -1129,20 +1190,20 @@ func TestHandleSmartRetry_ShortDelay_NetworkError_StickySession_ClearsSession(t
} }
// TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession // TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
// 503 + 短延迟 + 粘性会话 + 重试失败 → 清除粘性绑定 // 429 + 短延迟 + 粘性会话 + 重试失败 → 清除粘性绑定
func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession(t *testing.T) { func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession(t *testing.T) {
failRespBody := `{ failRespBody := `{
"error": { "error": {
"code": 503, "code": 429,
"status": "UNAVAILABLE", "status": "RESOURCE_EXHAUSTED",
"details": [ "details": [
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "RATE_LIMIT_EXCEEDED"},
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"} {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
] ]
} }
}` }`
failResp := &http.Response{ failResp := &http.Response{
StatusCode: http.StatusServiceUnavailable, StatusCode: http.StatusTooManyRequests,
Header: http.Header{}, Header: http.Header{},
Body: io.NopCloser(strings.NewReader(failRespBody)), Body: io.NopCloser(strings.NewReader(failRespBody)),
} }
@@ -1162,16 +1223,16 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
respBody := []byte(`{ respBody := []byte(`{
"error": { "error": {
"code": 503, "code": 429,
"status": "UNAVAILABLE", "status": "RESOURCE_EXHAUSTED",
"details": [ "details": [
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"}, {"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "RATE_LIMIT_EXCEEDED"},
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"} {"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
] ]
} }
}`) }`)
resp := &http.Response{ resp := &http.Response{
StatusCode: http.StatusServiceUnavailable, StatusCode: http.StatusTooManyRequests,
Header: http.Header{}, Header: http.Header{},
Body: io.NopCloser(bytes.NewReader(respBody)), Body: io.NopCloser(bytes.NewReader(respBody)),
} }