refactor: simplify sticky session rate limit handling — switch immediately on any rate limit
Remove threshold-based waiting in both sticky session and antigravity pre-check paths. When a model is rate-limited, immediately clear the sticky session and switch accounts instead of waiting for short durations.
This commit is contained in:
@@ -264,27 +264,15 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
|||||||
|
|
||||||
// antigravityRetryLoop 执行带 URL fallback 的重试循环
|
// antigravityRetryLoop 执行带 URL fallback 的重试循环
|
||||||
func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) {
|
func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopParams) (*antigravityRetryLoopResult, error) {
|
||||||
// 预检查:如果账号已限流,根据剩余时间决定等待或切换
|
// 预检查:如果账号已限流,直接返回切换信号
|
||||||
if p.requestedModel != "" {
|
if p.requestedModel != "" {
|
||||||
if remaining := p.account.GetRateLimitRemainingTimeWithContext(p.ctx, p.requestedModel); remaining > 0 {
|
if remaining := p.account.GetRateLimitRemainingTimeWithContext(p.ctx, p.requestedModel); remaining > 0 {
|
||||||
if remaining < antigravityRateLimitThreshold {
|
log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
|
||||||
// 限流剩余时间较短,等待后继续
|
p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
|
||||||
log.Printf("%s pre_check: rate_limit_wait remaining=%v model=%s account=%d",
|
return nil, &AntigravityAccountSwitchError{
|
||||||
p.prefix, remaining.Truncate(time.Millisecond), p.requestedModel, p.account.ID)
|
OriginalAccountID: p.account.ID,
|
||||||
select {
|
RateLimitedModel: p.requestedModel,
|
||||||
case <-p.ctx.Done():
|
IsStickySession: p.isStickySession,
|
||||||
return nil, p.ctx.Err()
|
|
||||||
case <-time.After(remaining):
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// 限流剩余时间较长,返回账号切换信号
|
|
||||||
log.Printf("%s pre_check: rate_limit_switch remaining=%v model=%s account=%d",
|
|
||||||
p.prefix, remaining.Truncate(time.Second), p.requestedModel, p.account.ID)
|
|
||||||
return nil, &AntigravityAccountSwitchError{
|
|
||||||
OriginalAccountID: p.account.ID,
|
|
||||||
RateLimitedModel: p.requestedModel,
|
|
||||||
IsStickySession: p.isStickySession,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -803,7 +803,7 @@ func TestSetModelRateLimitByModelName_NotConvertToScope(t *testing.T) {
|
|||||||
require.NotEqual(t, "claude_sonnet", call.modelKey, "should NOT be scope")
|
require.NotEqual(t, "claude_sonnet", call.modelKey, "should NOT be scope")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAntigravityRetryLoop_PreCheck_WaitsWhenRemainingBelowThreshold(t *testing.T) {
|
func TestAntigravityRetryLoop_PreCheck_SwitchesWhenRateLimited(t *testing.T) {
|
||||||
upstream := &recordingOKUpstream{}
|
upstream := &recordingOKUpstream{}
|
||||||
account := &Account{
|
account := &Account{
|
||||||
ID: 1,
|
ID: 1,
|
||||||
@@ -815,19 +815,15 @@ func TestAntigravityRetryLoop_PreCheck_WaitsWhenRemainingBelowThreshold(t *testi
|
|||||||
Extra: map[string]any{
|
Extra: map[string]any{
|
||||||
modelRateLimitsKey: map[string]any{
|
modelRateLimitsKey: map[string]any{
|
||||||
"claude-sonnet-4-5": map[string]any{
|
"claude-sonnet-4-5": map[string]any{
|
||||||
// RFC3339 here is second-precision; keep it safely in the future.
|
|
||||||
"rate_limit_reset_at": time.Now().Add(2 * time.Second).Format(time.RFC3339),
|
"rate_limit_reset_at": time.Now().Add(2 * time.Second).Format(time.RFC3339),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Millisecond)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
svc := &AntigravityGatewayService{}
|
svc := &AntigravityGatewayService{}
|
||||||
result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
|
result, err := svc.antigravityRetryLoop(antigravityRetryLoopParams{
|
||||||
ctx: ctx,
|
ctx: context.Background(),
|
||||||
prefix: "[test]",
|
prefix: "[test]",
|
||||||
account: account,
|
account: account,
|
||||||
accessToken: "token",
|
accessToken: "token",
|
||||||
@@ -841,12 +837,16 @@ func TestAntigravityRetryLoop_PreCheck_WaitsWhenRemainingBelowThreshold(t *testi
|
|||||||
},
|
},
|
||||||
})
|
})
|
||||||
|
|
||||||
require.ErrorIs(t, err, context.DeadlineExceeded)
|
|
||||||
require.Nil(t, result)
|
require.Nil(t, result)
|
||||||
require.Equal(t, 0, upstream.calls, "should not call upstream while waiting on pre-check")
|
var switchErr *AntigravityAccountSwitchError
|
||||||
|
require.ErrorAs(t, err, &switchErr)
|
||||||
|
require.Equal(t, account.ID, switchErr.OriginalAccountID)
|
||||||
|
require.Equal(t, "claude-sonnet-4-5", switchErr.RateLimitedModel)
|
||||||
|
require.True(t, switchErr.IsStickySession)
|
||||||
|
require.Equal(t, 0, upstream.calls, "should not call upstream when switching on pre-check")
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestAntigravityRetryLoop_PreCheck_SwitchesWhenRemainingAtOrAboveThreshold(t *testing.T) {
|
func TestAntigravityRetryLoop_PreCheck_SwitchesWhenRemainingLong(t *testing.T) {
|
||||||
upstream := &recordingOKUpstream{}
|
upstream := &recordingOKUpstream{}
|
||||||
account := &Account{
|
account := &Account{
|
||||||
ID: 2,
|
ID: 2,
|
||||||
|
|||||||
@@ -323,21 +323,15 @@ func derefGroupID(groupID *int64) int64 {
|
|||||||
return *groupID
|
return *groupID
|
||||||
}
|
}
|
||||||
|
|
||||||
// stickySessionRateLimitThreshold 定义清除粘性会话的限流时间阈值。
|
|
||||||
// 当账号限流剩余时间超过此阈值时,清除粘性会话以便切换到其他账号。
|
|
||||||
// 低于此阈值时保持粘性会话,等待短暂限流结束。
|
|
||||||
const stickySessionRateLimitThreshold = 10 * time.Second
|
|
||||||
|
|
||||||
// shouldClearStickySession 检查账号是否处于不可调度状态,需要清理粘性会话绑定。
|
// shouldClearStickySession 检查账号是否处于不可调度状态,需要清理粘性会话绑定。
|
||||||
// 当账号状态为错误、禁用、不可调度、处于临时不可调度期间,
|
// 当账号状态为错误、禁用、不可调度、处于临时不可调度期间,
|
||||||
// 或模型限流剩余时间超过 stickySessionRateLimitThreshold 时,返回 true。
|
// 或请求的模型处于限流状态时,返回 true。
|
||||||
// 这确保后续请求不会继续使用不可用的账号。
|
// 这确保后续请求不会继续使用不可用的账号。
|
||||||
//
|
//
|
||||||
// shouldClearStickySession checks if an account is in an unschedulable state
|
// shouldClearStickySession checks if an account is in an unschedulable state
|
||||||
// and the sticky session binding should be cleared.
|
// and the sticky session binding should be cleared.
|
||||||
// Returns true when account status is error/disabled, schedulable is false,
|
// Returns true when account status is error/disabled, schedulable is false,
|
||||||
// within temporary unschedulable period, or model rate limit remaining time
|
// within temporary unschedulable period, or the requested model is rate-limited.
|
||||||
// exceeds stickySessionRateLimitThreshold.
|
|
||||||
// This ensures subsequent requests won't continue using unavailable accounts.
|
// This ensures subsequent requests won't continue using unavailable accounts.
|
||||||
func shouldClearStickySession(account *Account, requestedModel string) bool {
|
func shouldClearStickySession(account *Account, requestedModel string) bool {
|
||||||
if account == nil {
|
if account == nil {
|
||||||
@@ -349,8 +343,8 @@ func shouldClearStickySession(account *Account, requestedModel string) bool {
|
|||||||
if account.TempUnschedulableUntil != nil && time.Now().Before(*account.TempUnschedulableUntil) {
|
if account.TempUnschedulableUntil != nil && time.Now().Before(*account.TempUnschedulableUntil) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
// 检查模型限流和 scope 限流,只在超过阈值时清除粘性会话
|
// 检查模型限流和 scope 限流,有限流即清除粘性会话
|
||||||
if remaining := account.GetRateLimitRemainingTimeWithContext(context.Background(), requestedModel); remaining > stickySessionRateLimitThreshold {
|
if remaining := account.GetRateLimitRemainingTimeWithContext(context.Background(), requestedModel); remaining > 0 {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
|
|||||||
@@ -23,8 +23,7 @@ import (
|
|||||||
// - 临时不可调度且未过期:清理
|
// - 临时不可调度且未过期:清理
|
||||||
// - 临时不可调度已过期:不清理
|
// - 临时不可调度已过期:不清理
|
||||||
// - 正常可调度状态:不清理
|
// - 正常可调度状态:不清理
|
||||||
// - 模型限流超过阈值:清理
|
// - 模型限流(任意时长):清理
|
||||||
// - 模型限流未超过阈值:不清理
|
|
||||||
//
|
//
|
||||||
// TestShouldClearStickySession tests the sticky session clearing logic.
|
// TestShouldClearStickySession tests the sticky session clearing logic.
|
||||||
// Verifies correct behavior for various account states including:
|
// Verifies correct behavior for various account states including:
|
||||||
@@ -35,9 +34,9 @@ func TestShouldClearStickySession(t *testing.T) {
|
|||||||
future := now.Add(1 * time.Hour)
|
future := now.Add(1 * time.Hour)
|
||||||
past := now.Add(-1 * time.Hour)
|
past := now.Add(-1 * time.Hour)
|
||||||
|
|
||||||
// 短限流时间(低于阈值,不应清除粘性会话)
|
// 短限流时间(有限流即清除粘性会话)
|
||||||
shortRateLimitReset := now.Add(5 * time.Second).Format(time.RFC3339)
|
shortRateLimitReset := now.Add(5 * time.Second).Format(time.RFC3339)
|
||||||
// 长限流时间(超过阈值,应清除粘性会话)
|
// 长限流时间(有限流即清除粘性会话)
|
||||||
longRateLimitReset := now.Add(30 * time.Second).Format(time.RFC3339)
|
longRateLimitReset := now.Add(30 * time.Second).Format(time.RFC3339)
|
||||||
|
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
@@ -53,7 +52,7 @@ func TestShouldClearStickySession(t *testing.T) {
|
|||||||
{name: "temp unschedulable", account: &Account{Status: StatusActive, Schedulable: true, TempUnschedulableUntil: &future}, requestedModel: "", want: true},
|
{name: "temp unschedulable", account: &Account{Status: StatusActive, Schedulable: true, TempUnschedulableUntil: &future}, requestedModel: "", want: true},
|
||||||
{name: "temp unschedulable expired", account: &Account{Status: StatusActive, Schedulable: true, TempUnschedulableUntil: &past}, requestedModel: "", want: false},
|
{name: "temp unschedulable expired", account: &Account{Status: StatusActive, Schedulable: true, TempUnschedulableUntil: &past}, requestedModel: "", want: false},
|
||||||
{name: "active schedulable", account: &Account{Status: StatusActive, Schedulable: true}, requestedModel: "", want: false},
|
{name: "active schedulable", account: &Account{Status: StatusActive, Schedulable: true}, requestedModel: "", want: false},
|
||||||
// 模型限流测试
|
// 模型限流测试:有限流即清除
|
||||||
{
|
{
|
||||||
name: "model rate limited short duration",
|
name: "model rate limited short duration",
|
||||||
account: &Account{
|
account: &Account{
|
||||||
@@ -68,7 +67,7 @@ func TestShouldClearStickySession(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
requestedModel: "claude-sonnet-4",
|
requestedModel: "claude-sonnet-4",
|
||||||
want: false, // 低于阈值,不清除
|
want: true, // 有限流即清除
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "model rate limited long duration",
|
name: "model rate limited long duration",
|
||||||
@@ -84,7 +83,7 @@ func TestShouldClearStickySession(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
requestedModel: "claude-sonnet-4",
|
requestedModel: "claude-sonnet-4",
|
||||||
want: true, // 超过阈值,清除
|
want: true, // 有限流即清除
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "model rate limited different model",
|
name: "model rate limited different model",
|
||||||
|
|||||||
Reference in New Issue
Block a user