feat: MODEL_CAPACITY_EXHAUSTED 使用固定1s间隔重试60次,不切换账号
MODEL_CAPACITY_EXHAUSTED (503) 表示模型容量不足,所有账号共享同一容量池, 切换账号无意义。改为固定1s间隔重试最多60次,重试耗尽后直接返回上游错误。 - 新增 antigravityModelCapacityRetryMaxAttempts=60 和 antigravityModelCapacityRetryWait=1s - shouldTriggerAntigravitySmartRetry 新增 isModelCapacityExhausted 返回值 - handleSmartRetry 对 MODEL_CAPACITY_EXHAUSTED 使用独立重试策略 - handleModelRateLimit 对 MODEL_CAPACITY_EXHAUSTED 仅标记 Handled,不设限流 - 重试耗尽后不设置模型限流、不清除粘性会话、不切换账号
This commit is contained in:
@@ -39,6 +39,12 @@ const (
|
|||||||
antigravitySmartRetryMaxAttempts = 1 // 智能重试最大次数(仅重试 1 次,防止重复限流/长期等待)
|
antigravitySmartRetryMaxAttempts = 1 // 智能重试最大次数(仅重试 1 次,防止重复限流/长期等待)
|
||||||
antigravityDefaultRateLimitDuration = 30 * time.Second // 默认限流时间(无 retryDelay 时使用)
|
antigravityDefaultRateLimitDuration = 30 * time.Second // 默认限流时间(无 retryDelay 时使用)
|
||||||
|
|
||||||
|
// MODEL_CAPACITY_EXHAUSTED 专用重试参数
|
||||||
|
// 模型容量不足时,所有账号共享同一容量池,切换账号无意义
|
||||||
|
// 使用固定 1s 间隔重试,最多重试 60 次
|
||||||
|
antigravityModelCapacityRetryMaxAttempts = 60
|
||||||
|
antigravityModelCapacityRetryWait = 1 * time.Second
|
||||||
|
|
||||||
// Google RPC 状态和类型常量
|
// Google RPC 状态和类型常量
|
||||||
googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED"
|
googleRPCStatusResourceExhausted = "RESOURCE_EXHAUSTED"
|
||||||
googleRPCStatusUnavailable = "UNAVAILABLE"
|
googleRPCStatusUnavailable = "UNAVAILABLE"
|
||||||
@@ -144,7 +150,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 判断是否触发智能重试
|
// 判断是否触发智能重试
|
||||||
shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName := shouldTriggerAntigravitySmartRetry(p.account, respBody)
|
shouldSmartRetry, shouldRateLimitModel, waitDuration, modelName, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(p.account, respBody)
|
||||||
|
|
||||||
// 情况1: retryDelay >= 阈值,限流模型并切换账号
|
// 情况1: retryDelay >= 阈值,限流模型并切换账号
|
||||||
if shouldRateLimitModel {
|
if shouldRateLimitModel {
|
||||||
@@ -174,14 +180,21 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 情况2: retryDelay < 阈值,智能重试(最多 antigravitySmartRetryMaxAttempts 次)
|
// 情况2: retryDelay < 阈值(或 MODEL_CAPACITY_EXHAUSTED),智能重试
|
||||||
if shouldSmartRetry {
|
if shouldSmartRetry {
|
||||||
var lastRetryResp *http.Response
|
var lastRetryResp *http.Response
|
||||||
var lastRetryBody []byte
|
var lastRetryBody []byte
|
||||||
|
|
||||||
for attempt := 1; attempt <= antigravitySmartRetryMaxAttempts; attempt++ {
|
// MODEL_CAPACITY_EXHAUSTED 使用独立的重试参数(60 次,固定 1s 间隔)
|
||||||
|
maxAttempts := antigravitySmartRetryMaxAttempts
|
||||||
|
if isModelCapacityExhausted {
|
||||||
|
maxAttempts = antigravityModelCapacityRetryMaxAttempts
|
||||||
|
waitDuration = antigravityModelCapacityRetryWait
|
||||||
|
}
|
||||||
|
|
||||||
|
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
||||||
log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
|
log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
|
||||||
p.prefix, resp.StatusCode, attempt, antigravitySmartRetryMaxAttempts, waitDuration, modelName, p.account.ID)
|
p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
|
||||||
|
|
||||||
select {
|
select {
|
||||||
case <-p.ctx.Done():
|
case <-p.ctx.Done():
|
||||||
@@ -207,13 +220,13 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
|||||||
|
|
||||||
retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
|
retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
|
||||||
if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
|
if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
|
||||||
log.Printf("%s status=%d smart_retry_success attempt=%d/%d", p.prefix, retryResp.StatusCode, attempt, antigravitySmartRetryMaxAttempts)
|
log.Printf("%s status=%d smart_retry_success attempt=%d/%d", p.prefix, retryResp.StatusCode, attempt, maxAttempts)
|
||||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
|
return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 网络错误时,继续重试
|
// 网络错误时,继续重试
|
||||||
if retryErr != nil || retryResp == nil {
|
if retryErr != nil || retryResp == nil {
|
||||||
log.Printf("%s status=smart_retry_network_error attempt=%d/%d error=%v", p.prefix, attempt, antigravitySmartRetryMaxAttempts, retryErr)
|
log.Printf("%s status=smart_retry_network_error attempt=%d/%d error=%v", p.prefix, attempt, maxAttempts, retryErr)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -227,26 +240,43 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
|||||||
_ = retryResp.Body.Close()
|
_ = retryResp.Body.Close()
|
||||||
}
|
}
|
||||||
|
|
||||||
// 解析新的重试信息,用于下次重试的等待时间
|
// 解析新的重试信息,用于下次重试的等待时间(MODEL_CAPACITY_EXHAUSTED 使用固定循环,跳过)
|
||||||
if attempt < antigravitySmartRetryMaxAttempts && lastRetryBody != nil {
|
if !isModelCapacityExhausted && attempt < maxAttempts && lastRetryBody != nil {
|
||||||
newShouldRetry, _, newWaitDuration, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
|
newShouldRetry, _, newWaitDuration, _, _ := shouldTriggerAntigravitySmartRetry(p.account, lastRetryBody)
|
||||||
if newShouldRetry && newWaitDuration > 0 {
|
if newShouldRetry && newWaitDuration > 0 {
|
||||||
waitDuration = newWaitDuration
|
waitDuration = newWaitDuration
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 所有重试都失败,限流当前模型并切换账号
|
// 所有重试都失败
|
||||||
rateLimitDuration := waitDuration
|
|
||||||
if rateLimitDuration <= 0 {
|
|
||||||
rateLimitDuration = antigravityDefaultRateLimitDuration
|
|
||||||
}
|
|
||||||
retryBody := lastRetryBody
|
retryBody := lastRetryBody
|
||||||
if retryBody == nil {
|
if retryBody == nil {
|
||||||
retryBody = respBody
|
retryBody = respBody
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MODEL_CAPACITY_EXHAUSTED:模型容量不足,切换账号无意义
|
||||||
|
// 直接返回上游错误响应,不设置模型限流,不切换账号
|
||||||
|
if isModelCapacityExhausted {
|
||||||
|
log.Printf("%s status=%d smart_retry_exhausted_model_capacity attempts=%d model=%s account=%d body=%s (model capacity exhausted, not switching account)",
|
||||||
|
p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200))
|
||||||
|
return &smartRetryResult{
|
||||||
|
action: smartRetryActionBreakWithResp,
|
||||||
|
resp: &http.Response{
|
||||||
|
StatusCode: resp.StatusCode,
|
||||||
|
Header: resp.Header.Clone(),
|
||||||
|
Body: io.NopCloser(bytes.NewReader(retryBody)),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RATE_LIMIT_EXCEEDED:账号级限流,限流当前模型并切换账号
|
||||||
|
rateLimitDuration := waitDuration
|
||||||
|
if rateLimitDuration <= 0 {
|
||||||
|
rateLimitDuration = antigravityDefaultRateLimitDuration
|
||||||
|
}
|
||||||
log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)",
|
log.Printf("%s status=%d smart_retry_exhausted attempts=%d model=%s account=%d upstream_retry_delay=%v body=%s (switch account)",
|
||||||
p.prefix, resp.StatusCode, antigravitySmartRetryMaxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200))
|
p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID, rateLimitDuration, truncateForLog(retryBody, 200))
|
||||||
|
|
||||||
resetAt := time.Now().Add(rateLimitDuration)
|
resetAt := time.Now().Add(rateLimitDuration)
|
||||||
if p.accountRepo != nil && modelName != "" {
|
if p.accountRepo != nil && modelName != "" {
|
||||||
@@ -2053,8 +2083,9 @@ func antigravityFallbackCooldownSeconds() (time.Duration, bool) {
|
|||||||
|
|
||||||
// antigravitySmartRetryInfo 智能重试所需的信息
|
// antigravitySmartRetryInfo 智能重试所需的信息
|
||||||
type antigravitySmartRetryInfo struct {
|
type antigravitySmartRetryInfo struct {
|
||||||
RetryDelay time.Duration // 重试延迟时间
|
RetryDelay time.Duration // 重试延迟时间
|
||||||
ModelName string // 限流的模型名称(如 "claude-sonnet-4-5")
|
ModelName string // 限流的模型名称(如 "claude-sonnet-4-5")
|
||||||
|
IsModelCapacityExhausted bool // 是否为模型容量不足(MODEL_CAPACITY_EXHAUSTED)
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseAntigravitySmartRetryInfo 解析 Google RPC RetryInfo 和 ErrorInfo 信息
|
// parseAntigravitySmartRetryInfo 解析 Google RPC RetryInfo 和 ErrorInfo 信息
|
||||||
@@ -2169,31 +2200,40 @@ func parseAntigravitySmartRetryInfo(body []byte) *antigravitySmartRetryInfo {
|
|||||||
}
|
}
|
||||||
|
|
||||||
return &antigravitySmartRetryInfo{
|
return &antigravitySmartRetryInfo{
|
||||||
RetryDelay: retryDelay,
|
RetryDelay: retryDelay,
|
||||||
ModelName: modelName,
|
ModelName: modelName,
|
||||||
|
IsModelCapacityExhausted: hasModelCapacityExhausted,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// shouldTriggerAntigravitySmartRetry 判断是否应该触发智能重试
|
// shouldTriggerAntigravitySmartRetry 判断是否应该触发智能重试
|
||||||
// 返回:
|
// 返回:
|
||||||
// - shouldRetry: 是否应该智能重试(retryDelay < antigravityRateLimitThreshold)
|
// - shouldRetry: 是否应该智能重试(retryDelay < antigravityRateLimitThreshold,或 MODEL_CAPACITY_EXHAUSTED)
|
||||||
// - shouldRateLimitModel: 是否应该限流模型(retryDelay >= antigravityRateLimitThreshold)
|
// - shouldRateLimitModel: 是否应该限流模型并切换账号(仅 RATE_LIMIT_EXCEEDED 且 retryDelay >= 阈值)
|
||||||
// - waitDuration: 等待时间(智能重试时使用,shouldRateLimitModel=true 时为 0)
|
// - waitDuration: 等待时间
|
||||||
// - modelName: 限流的模型名称
|
// - modelName: 限流的模型名称
|
||||||
func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string) {
|
// - isModelCapacityExhausted: 是否为模型容量不足(MODEL_CAPACITY_EXHAUSTED)
|
||||||
|
func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shouldRetry bool, shouldRateLimitModel bool, waitDuration time.Duration, modelName string, isModelCapacityExhausted bool) {
|
||||||
if account.Platform != PlatformAntigravity {
|
if account.Platform != PlatformAntigravity {
|
||||||
return false, false, 0, ""
|
return false, false, 0, "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
info := parseAntigravitySmartRetryInfo(respBody)
|
info := parseAntigravitySmartRetryInfo(respBody)
|
||||||
if info == nil {
|
if info == nil {
|
||||||
return false, false, 0, ""
|
return false, false, 0, "", false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MODEL_CAPACITY_EXHAUSTED(模型容量不足):所有账号共享同一模型容量池
|
||||||
|
// 切换账号无意义,使用固定 1s 间隔重试
|
||||||
|
if info.IsModelCapacityExhausted {
|
||||||
|
return true, false, antigravityModelCapacityRetryWait, info.ModelName, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// RATE_LIMIT_EXCEEDED(账号级限流):
|
||||||
// retryDelay >= 阈值:直接限流模型,不重试
|
// retryDelay >= 阈值:直接限流模型,不重试
|
||||||
// 注意:如果上游未提供 retryDelay,parseAntigravitySmartRetryInfo 已设置为默认 30s
|
// 注意:如果上游未提供 retryDelay,parseAntigravitySmartRetryInfo 已设置为默认 30s
|
||||||
if info.RetryDelay >= antigravityRateLimitThreshold {
|
if info.RetryDelay >= antigravityRateLimitThreshold {
|
||||||
return false, true, info.RetryDelay, info.ModelName
|
return false, true, info.RetryDelay, info.ModelName, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// retryDelay < 阈值:智能重试
|
// retryDelay < 阈值:智能重试
|
||||||
@@ -2202,7 +2242,7 @@ func shouldTriggerAntigravitySmartRetry(account *Account, respBody []byte) (shou
|
|||||||
waitDuration = antigravitySmartRetryMinWait
|
waitDuration = antigravitySmartRetryMinWait
|
||||||
}
|
}
|
||||||
|
|
||||||
return true, false, waitDuration, info.ModelName
|
return true, false, waitDuration, info.ModelName, false
|
||||||
}
|
}
|
||||||
|
|
||||||
// handleModelRateLimitParams 模型级限流处理参数
|
// handleModelRateLimitParams 模型级限流处理参数
|
||||||
@@ -2228,8 +2268,9 @@ type handleModelRateLimitResult struct {
|
|||||||
|
|
||||||
// handleModelRateLimit 处理模型级限流(在原有逻辑之前调用)
|
// handleModelRateLimit 处理模型级限流(在原有逻辑之前调用)
|
||||||
// 仅处理 429/503,解析模型名和 retryDelay
|
// 仅处理 429/503,解析模型名和 retryDelay
|
||||||
// - retryDelay < antigravityRateLimitThreshold: 返回 ShouldRetry=true,由调用方等待后重试
|
// - MODEL_CAPACITY_EXHAUSTED: 返回 Handled=true(实际重试由 handleSmartRetry 处理)
|
||||||
// - retryDelay >= antigravityRateLimitThreshold: 设置模型限流 + 清除粘性会话 + 返回 SwitchError
|
// - RATE_LIMIT_EXCEEDED + retryDelay < 阈值: 返回 ShouldRetry=true,由调用方等待后重试
|
||||||
|
// - RATE_LIMIT_EXCEEDED + retryDelay >= 阈值: 设置模型限流 + 清除粘性会话 + 返回 SwitchError
|
||||||
func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimitParams) *handleModelRateLimitResult {
|
func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimitParams) *handleModelRateLimitResult {
|
||||||
if p.statusCode != 429 && p.statusCode != 503 {
|
if p.statusCode != 429 && p.statusCode != 503 {
|
||||||
return &handleModelRateLimitResult{Handled: false}
|
return &handleModelRateLimitResult{Handled: false}
|
||||||
@@ -2240,7 +2281,17 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit
|
|||||||
return &handleModelRateLimitResult{Handled: false}
|
return &handleModelRateLimitResult{Handled: false}
|
||||||
}
|
}
|
||||||
|
|
||||||
// < antigravityRateLimitThreshold: 等待后重试
|
// MODEL_CAPACITY_EXHAUSTED:模型容量不足,所有账号共享同一容量池
|
||||||
|
// 切换账号无意义,不设置模型限流(实际重试由 handleSmartRetry 处理)
|
||||||
|
if info.IsModelCapacityExhausted {
|
||||||
|
log.Printf("%s status=%d model_capacity_exhausted model=%s (not switching account, retry handled by smart retry)",
|
||||||
|
p.prefix, p.statusCode, info.ModelName)
|
||||||
|
return &handleModelRateLimitResult{
|
||||||
|
Handled: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RATE_LIMIT_EXCEEDED: < antigravityRateLimitThreshold: 等待后重试
|
||||||
if info.RetryDelay < antigravityRateLimitThreshold {
|
if info.RetryDelay < antigravityRateLimitThreshold {
|
||||||
log.Printf("%s status=%d model_rate_limit_wait model=%s wait=%v",
|
log.Printf("%s status=%d model_rate_limit_wait model=%s wait=%v",
|
||||||
p.prefix, p.statusCode, info.ModelName, info.RetryDelay)
|
p.prefix, p.statusCode, info.ModelName, info.RetryDelay)
|
||||||
@@ -2251,7 +2302,7 @@ func (s *AntigravityGatewayService) handleModelRateLimit(p *handleModelRateLimit
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// >= antigravityRateLimitThreshold: 设置限流 + 清除粘性会话 + 切换账号
|
// RATE_LIMIT_EXCEEDED: >= antigravityRateLimitThreshold: 设置限流 + 清除粘性会话 + 切换账号
|
||||||
s.setModelRateLimitAndClearSession(p, info)
|
s.setModelRateLimitAndClearSession(p, info)
|
||||||
|
|
||||||
return &handleModelRateLimitResult{
|
return &handleModelRateLimitResult{
|
||||||
|
|||||||
@@ -188,13 +188,14 @@ func TestHandleUpstreamError_429_NonModelRateLimit(t *testing.T) {
|
|||||||
require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey)
|
require.Equal(t, "claude-sonnet-4-5", repo.modelRateLimitCalls[0].modelKey)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestHandleUpstreamError_503_ModelRateLimit 测试 503 模型限流场景
|
// TestHandleUpstreamError_503_ModelCapacityExhausted 测试 503 模型容量不足场景
|
||||||
func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) {
|
// MODEL_CAPACITY_EXHAUSTED 时应等待重试,不切换账号
|
||||||
|
func TestHandleUpstreamError_503_ModelCapacityExhausted(t *testing.T) {
|
||||||
repo := &stubAntigravityAccountRepo{}
|
repo := &stubAntigravityAccountRepo{}
|
||||||
svc := &AntigravityGatewayService{accountRepo: repo}
|
svc := &AntigravityGatewayService{accountRepo: repo}
|
||||||
account := &Account{ID: 3, Name: "acc-3", Platform: PlatformAntigravity}
|
account := &Account{ID: 3, Name: "acc-3", Platform: PlatformAntigravity}
|
||||||
|
|
||||||
// 503 + MODEL_CAPACITY_EXHAUSTED → 模型限流
|
// 503 + MODEL_CAPACITY_EXHAUSTED → 等待重试,不切换账号
|
||||||
body := []byte(`{
|
body := []byte(`{
|
||||||
"error": {
|
"error": {
|
||||||
"status": "UNAVAILABLE",
|
"status": "UNAVAILABLE",
|
||||||
@@ -207,13 +208,13 @@ func TestHandleUpstreamError_503_ModelRateLimit(t *testing.T) {
|
|||||||
|
|
||||||
result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false)
|
result := svc.handleUpstreamError(context.Background(), "[test]", account, http.StatusServiceUnavailable, http.Header{}, body, "gemini-3-pro-high", 0, "", false)
|
||||||
|
|
||||||
// 应该触发模型限流
|
// MODEL_CAPACITY_EXHAUSTED 应该标记为已处理,不切换账号,不设置模型限流
|
||||||
|
// 实际重试由 handleSmartRetry 处理
|
||||||
require.NotNil(t, result)
|
require.NotNil(t, result)
|
||||||
require.True(t, result.Handled)
|
require.True(t, result.Handled)
|
||||||
require.NotNil(t, result.SwitchError)
|
require.False(t, result.ShouldRetry, "MODEL_CAPACITY_EXHAUSTED should not trigger retry from handleModelRateLimit path")
|
||||||
require.Equal(t, "gemini-3-pro-high", result.SwitchError.RateLimitedModel)
|
require.Nil(t, result.SwitchError, "MODEL_CAPACITY_EXHAUSTED should not trigger account switch")
|
||||||
require.Len(t, repo.modelRateLimitCalls, 1)
|
require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit")
|
||||||
require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestHandleUpstreamError_503_NonModelRateLimit 测试 503 非模型限流场景(不处理)
|
// TestHandleUpstreamError_503_NonModelRateLimit 测试 503 非模型限流场景(不处理)
|
||||||
@@ -301,11 +302,12 @@ func TestParseGeminiRateLimitResetTime_QuotaResetDelay_RoundsUp(t *testing.T) {
|
|||||||
|
|
||||||
func TestParseAntigravitySmartRetryInfo(t *testing.T) {
|
func TestParseAntigravitySmartRetryInfo(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
body string
|
body string
|
||||||
expectedDelay time.Duration
|
expectedDelay time.Duration
|
||||||
expectedModel string
|
expectedModel string
|
||||||
expectedNil bool
|
expectedNil bool
|
||||||
|
expectedIsModelCapacityExhausted bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "valid complete response with RATE_LIMIT_EXCEEDED",
|
name: "valid complete response with RATE_LIMIT_EXCEEDED",
|
||||||
@@ -368,8 +370,9 @@ func TestParseAntigravitySmartRetryInfo(t *testing.T) {
|
|||||||
"message": "No capacity available for model gemini-3-pro-high on the server"
|
"message": "No capacity available for model gemini-3-pro-high on the server"
|
||||||
}
|
}
|
||||||
}`,
|
}`,
|
||||||
expectedDelay: 39 * time.Second,
|
expectedDelay: 39 * time.Second,
|
||||||
expectedModel: "gemini-3-pro-high",
|
expectedModel: "gemini-3-pro-high",
|
||||||
|
expectedIsModelCapacityExhausted: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "503 UNAVAILABLE without MODEL_CAPACITY_EXHAUSTED - should return nil",
|
name: "503 UNAVAILABLE without MODEL_CAPACITY_EXHAUSTED - should return nil",
|
||||||
@@ -480,6 +483,9 @@ func TestParseAntigravitySmartRetryInfo(t *testing.T) {
|
|||||||
if result.ModelName != tt.expectedModel {
|
if result.ModelName != tt.expectedModel {
|
||||||
t.Errorf("ModelName = %q, want %q", result.ModelName, tt.expectedModel)
|
t.Errorf("ModelName = %q, want %q", result.ModelName, tt.expectedModel)
|
||||||
}
|
}
|
||||||
|
if result.IsModelCapacityExhausted != tt.expectedIsModelCapacityExhausted {
|
||||||
|
t.Errorf("IsModelCapacityExhausted = %v, want %v", result.IsModelCapacityExhausted, tt.expectedIsModelCapacityExhausted)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -491,13 +497,14 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
|
|||||||
apiKeyAccount := &Account{Type: AccountTypeAPIKey}
|
apiKeyAccount := &Account{Type: AccountTypeAPIKey}
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
account *Account
|
account *Account
|
||||||
body string
|
body string
|
||||||
expectedShouldRetry bool
|
expectedShouldRetry bool
|
||||||
expectedShouldRateLimit bool
|
expectedShouldRateLimit bool
|
||||||
minWait time.Duration
|
expectedIsModelCapacityExhausted bool
|
||||||
modelName string
|
minWait time.Duration
|
||||||
|
modelName string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
name: "OAuth account with short delay (< 7s) - smart retry",
|
name: "OAuth account with short delay (< 7s) - smart retry",
|
||||||
@@ -611,13 +618,14 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
|
|||||||
]
|
]
|
||||||
}
|
}
|
||||||
}`,
|
}`,
|
||||||
expectedShouldRetry: false,
|
expectedShouldRetry: true,
|
||||||
expectedShouldRateLimit: true,
|
expectedShouldRateLimit: false,
|
||||||
minWait: 39 * time.Second,
|
expectedIsModelCapacityExhausted: true,
|
||||||
modelName: "gemini-3-pro-high",
|
minWait: 1 * time.Second,
|
||||||
|
modelName: "gemini-3-pro-high",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - no retryDelay - use default rate limit",
|
name: "503 UNAVAILABLE with MODEL_CAPACITY_EXHAUSTED - no retryDelay - use fixed wait",
|
||||||
account: oauthAccount,
|
account: oauthAccount,
|
||||||
body: `{
|
body: `{
|
||||||
"error": {
|
"error": {
|
||||||
@@ -629,10 +637,11 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
|
|||||||
"message": "No capacity available for model gemini-2.5-flash on the server"
|
"message": "No capacity available for model gemini-2.5-flash on the server"
|
||||||
}
|
}
|
||||||
}`,
|
}`,
|
||||||
expectedShouldRetry: false,
|
expectedShouldRetry: true,
|
||||||
expectedShouldRateLimit: true,
|
expectedShouldRateLimit: false,
|
||||||
minWait: 30 * time.Second,
|
expectedIsModelCapacityExhausted: true,
|
||||||
modelName: "gemini-2.5-flash",
|
minWait: 1 * time.Second,
|
||||||
|
modelName: "gemini-2.5-flash",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "429 RESOURCE_EXHAUSTED with RATE_LIMIT_EXCEEDED - no retryDelay - use default rate limit",
|
name: "429 RESOURCE_EXHAUSTED with RATE_LIMIT_EXCEEDED - no retryDelay - use default rate limit",
|
||||||
@@ -656,13 +665,16 @@ func TestShouldTriggerAntigravitySmartRetry(t *testing.T) {
|
|||||||
|
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
shouldRetry, shouldRateLimit, wait, model := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body))
|
shouldRetry, shouldRateLimit, wait, model, isModelCapacityExhausted := shouldTriggerAntigravitySmartRetry(tt.account, []byte(tt.body))
|
||||||
if shouldRetry != tt.expectedShouldRetry {
|
if shouldRetry != tt.expectedShouldRetry {
|
||||||
t.Errorf("shouldRetry = %v, want %v", shouldRetry, tt.expectedShouldRetry)
|
t.Errorf("shouldRetry = %v, want %v", shouldRetry, tt.expectedShouldRetry)
|
||||||
}
|
}
|
||||||
if shouldRateLimit != tt.expectedShouldRateLimit {
|
if shouldRateLimit != tt.expectedShouldRateLimit {
|
||||||
t.Errorf("shouldRateLimit = %v, want %v", shouldRateLimit, tt.expectedShouldRateLimit)
|
t.Errorf("shouldRateLimit = %v, want %v", shouldRateLimit, tt.expectedShouldRateLimit)
|
||||||
}
|
}
|
||||||
|
if isModelCapacityExhausted != tt.expectedIsModelCapacityExhausted {
|
||||||
|
t.Errorf("isModelCapacityExhausted = %v, want %v", isModelCapacityExhausted, tt.expectedIsModelCapacityExhausted)
|
||||||
|
}
|
||||||
if shouldRetry {
|
if shouldRetry {
|
||||||
if wait < tt.minWait {
|
if wait < tt.minWait {
|
||||||
t.Errorf("wait = %v, want >= %v", wait, tt.minWait)
|
t.Errorf("wait = %v, want >= %v", wait, tt.minWait)
|
||||||
|
|||||||
@@ -294,8 +294,9 @@ func TestHandleSmartRetry_ShortDelay_SmartRetryFailed_ReturnsSwitchError(t *test
|
|||||||
require.Len(t, upstream.calls, 1, "should have made one retry call (max attempts)")
|
require.Len(t, upstream.calls, 1, "should have made one retry call (max attempts)")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError 测试 503 MODEL_CAPACITY_EXHAUSTED 返回 switchError
|
// TestHandleSmartRetry_503_ModelCapacityExhausted_RetrySuccess 测试 503 MODEL_CAPACITY_EXHAUSTED 重试成功
|
||||||
func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testing.T) {
|
// MODEL_CAPACITY_EXHAUSTED 使用固定 1s 间隔重试,不切换账号
|
||||||
|
func TestHandleSmartRetry_503_ModelCapacityExhausted_RetrySuccess(t *testing.T) {
|
||||||
repo := &stubAntigravityAccountRepo{}
|
repo := &stubAntigravityAccountRepo{}
|
||||||
account := &Account{
|
account := &Account{
|
||||||
ID: 3,
|
ID: 3,
|
||||||
@@ -304,7 +305,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
|
|||||||
Platform: PlatformAntigravity,
|
Platform: PlatformAntigravity,
|
||||||
}
|
}
|
||||||
|
|
||||||
// 503 + MODEL_CAPACITY_EXHAUSTED + 39s >= 7s 阈值
|
// 503 + MODEL_CAPACITY_EXHAUSTED + 39s(上游 retryDelay 应被忽略,使用固定 1s)
|
||||||
respBody := []byte(`{
|
respBody := []byte(`{
|
||||||
"error": {
|
"error": {
|
||||||
"code": 503,
|
"code": 503,
|
||||||
@@ -322,6 +323,14 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
|
|||||||
Body: io.NopCloser(bytes.NewReader(respBody)),
|
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// mock: 第 1 次重试返回 200 成功
|
||||||
|
upstream := &mockSmartRetryUpstream{
|
||||||
|
responses: []*http.Response{
|
||||||
|
{StatusCode: http.StatusOK, Header: http.Header{}, Body: io.NopCloser(strings.NewReader(`{"ok":true}`))},
|
||||||
|
},
|
||||||
|
errors: []error{nil},
|
||||||
|
}
|
||||||
|
|
||||||
params := antigravityRetryLoopParams{
|
params := antigravityRetryLoopParams{
|
||||||
ctx: context.Background(),
|
ctx: context.Background(),
|
||||||
prefix: "[test]",
|
prefix: "[test]",
|
||||||
@@ -330,6 +339,7 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
|
|||||||
action: "generateContent",
|
action: "generateContent",
|
||||||
body: []byte(`{"input":"test"}`),
|
body: []byte(`{"input":"test"}`),
|
||||||
accountRepo: repo,
|
accountRepo: repo,
|
||||||
|
httpUpstream: upstream,
|
||||||
isStickySession: true,
|
isStickySession: true,
|
||||||
handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
|
handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
|
||||||
return nil
|
return nil
|
||||||
@@ -343,16 +353,67 @@ func TestHandleSmartRetry_503_ModelCapacityExhausted_ReturnsSwitchError(t *testi
|
|||||||
|
|
||||||
require.NotNil(t, result)
|
require.NotNil(t, result)
|
||||||
require.Equal(t, smartRetryActionBreakWithResp, result.action)
|
require.Equal(t, smartRetryActionBreakWithResp, result.action)
|
||||||
require.Nil(t, result.resp)
|
require.NotNil(t, result.resp, "should return successful response")
|
||||||
|
require.Equal(t, http.StatusOK, result.resp.StatusCode)
|
||||||
require.Nil(t, result.err)
|
require.Nil(t, result.err)
|
||||||
require.NotNil(t, result.switchError, "should return switchError for 503 model capacity exhausted")
|
require.Nil(t, result.switchError, "MODEL_CAPACITY_EXHAUSTED should not return switchError")
|
||||||
require.Equal(t, account.ID, result.switchError.OriginalAccountID)
|
|
||||||
require.Equal(t, "gemini-3-pro-high", result.switchError.RateLimitedModel)
|
|
||||||
require.True(t, result.switchError.IsStickySession)
|
|
||||||
|
|
||||||
// 验证模型限流已设置
|
// 不应设置模型限流
|
||||||
require.Len(t, repo.modelRateLimitCalls, 1)
|
require.Empty(t, repo.modelRateLimitCalls, "MODEL_CAPACITY_EXHAUSTED should not set model rate limit")
|
||||||
require.Equal(t, "gemini-3-pro-high", repo.modelRateLimitCalls[0].modelKey)
|
require.Len(t, upstream.calls, 1, "should have made one retry call before success")
|
||||||
|
}
|
||||||
|
|
||||||
|
// TestHandleSmartRetry_503_ModelCapacityExhausted_ContextCancel 测试 MODEL_CAPACITY_EXHAUSTED 上下文取消
|
||||||
|
func TestHandleSmartRetry_503_ModelCapacityExhausted_ContextCancel(t *testing.T) {
|
||||||
|
repo := &stubAntigravityAccountRepo{}
|
||||||
|
account := &Account{
|
||||||
|
ID: 3,
|
||||||
|
Name: "acc-3",
|
||||||
|
Type: AccountTypeOAuth,
|
||||||
|
Platform: PlatformAntigravity,
|
||||||
|
}
|
||||||
|
|
||||||
|
respBody := []byte(`{
|
||||||
|
"error": {
|
||||||
|
"code": 503,
|
||||||
|
"status": "UNAVAILABLE",
|
||||||
|
"details": [
|
||||||
|
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro-high"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
||||||
|
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "39s"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}`)
|
||||||
|
resp := &http.Response{
|
||||||
|
StatusCode: http.StatusServiceUnavailable,
|
||||||
|
Header: http.Header{},
|
||||||
|
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||||
|
}
|
||||||
|
|
||||||
|
// 立即取消上下文,验证重试循环能正确退出
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
cancel()
|
||||||
|
|
||||||
|
params := antigravityRetryLoopParams{
|
||||||
|
ctx: ctx,
|
||||||
|
prefix: "[test]",
|
||||||
|
account: account,
|
||||||
|
accessToken: "token",
|
||||||
|
action: "generateContent",
|
||||||
|
body: []byte(`{"input":"test"}`),
|
||||||
|
accountRepo: repo,
|
||||||
|
handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
svc := &AntigravityGatewayService{}
|
||||||
|
result := svc.handleSmartRetry(params, resp, respBody, "https://ag-1.test", 0, []string{"https://ag-1.test"})
|
||||||
|
|
||||||
|
require.NotNil(t, result)
|
||||||
|
require.Equal(t, smartRetryActionBreakWithResp, result.action)
|
||||||
|
require.Error(t, result.err, "should return context error")
|
||||||
|
require.Nil(t, result.switchError, "should not return switchError on context cancel")
|
||||||
|
require.Empty(t, repo.modelRateLimitCalls, "should not set model rate limit on context cancel")
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestHandleSmartRetry_NonAntigravityAccount_ContinuesDefaultLogic 测试非 Antigravity 平台账号走默认逻辑
|
// TestHandleSmartRetry_NonAntigravityAccount_ContinuesDefaultLogic 测试非 Antigravity 平台账号走默认逻辑
|
||||||
@@ -1129,20 +1190,20 @@ func TestHandleSmartRetry_ShortDelay_NetworkError_StickySession_ClearsSession(t
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
|
// TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
|
||||||
// 503 + 短延迟 + 粘性会话 + 重试失败 → 清除粘性绑定
|
// 429 + 短延迟 + 粘性会话 + 重试失败 → 清除粘性绑定
|
||||||
func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession(t *testing.T) {
|
func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession(t *testing.T) {
|
||||||
failRespBody := `{
|
failRespBody := `{
|
||||||
"error": {
|
"error": {
|
||||||
"code": 503,
|
"code": 429,
|
||||||
"status": "UNAVAILABLE",
|
"status": "RESOURCE_EXHAUSTED",
|
||||||
"details": [
|
"details": [
|
||||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "RATE_LIMIT_EXCEEDED"},
|
||||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
|
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}`
|
}`
|
||||||
failResp := &http.Response{
|
failResp := &http.Response{
|
||||||
StatusCode: http.StatusServiceUnavailable,
|
StatusCode: http.StatusTooManyRequests,
|
||||||
Header: http.Header{},
|
Header: http.Header{},
|
||||||
Body: io.NopCloser(strings.NewReader(failRespBody)),
|
Body: io.NopCloser(strings.NewReader(failRespBody)),
|
||||||
}
|
}
|
||||||
@@ -1162,16 +1223,16 @@ func TestHandleSmartRetry_ShortDelay_503_StickySession_FailedRetry_ClearsSession
|
|||||||
|
|
||||||
respBody := []byte(`{
|
respBody := []byte(`{
|
||||||
"error": {
|
"error": {
|
||||||
"code": 503,
|
"code": 429,
|
||||||
"status": "UNAVAILABLE",
|
"status": "RESOURCE_EXHAUSTED",
|
||||||
"details": [
|
"details": [
|
||||||
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "MODEL_CAPACITY_EXHAUSTED"},
|
{"@type": "type.googleapis.com/google.rpc.ErrorInfo", "metadata": {"model": "gemini-3-pro"}, "reason": "RATE_LIMIT_EXCEEDED"},
|
||||||
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
|
{"@type": "type.googleapis.com/google.rpc.RetryInfo", "retryDelay": "0.5s"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}`)
|
}`)
|
||||||
resp := &http.Response{
|
resp := &http.Response{
|
||||||
StatusCode: http.StatusServiceUnavailable,
|
StatusCode: http.StatusTooManyRequests,
|
||||||
Header: http.Header{},
|
Header: http.Header{},
|
||||||
Body: io.NopCloser(bytes.NewReader(respBody)),
|
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user