perf: 错误处理性能优化
- MatchRule 延迟/限制 body ToLower,先用 statusCode 短路,只在需要关键词匹配时转换且限制 8KB - 预计算规则的小写关键词/平台和 error code set,消除运行时重复 ToLower 和线性扫描 - MODEL_CAPACITY_EXHAUSTED 全局去重,避免并发请求重复重试同一模型 - 503 重试 body 读取限制从 2MB 降至 8KB - time.After 替换为 time.NewTimer,防止 context 取消时 timer 泄漏
This commit is contained in:
@@ -16,6 +16,7 @@ import (
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
@@ -66,6 +67,9 @@ const (
|
||||
// 单账号 503 退避重试:原地重试的总累计等待时间上限
|
||||
// 超过此上限将不再重试,直接返回 503
|
||||
antigravitySingleAccountSmartRetryTotalMaxWait = 30 * time.Second
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED 全局去重:重试全部失败后的 cooldown 时间
|
||||
antigravityModelCapacityCooldown = 10 * time.Second
|
||||
)
|
||||
|
||||
// antigravityPassthroughErrorMessages 透传给客户端的错误消息白名单(小写)
|
||||
@@ -74,6 +78,12 @@ var antigravityPassthroughErrorMessages = []string{
|
||||
"prompt is too long",
|
||||
}
|
||||
|
||||
// MODEL_CAPACITY_EXHAUSTED 全局去重:避免多个并发请求同时对同一模型进行容量耗尽重试
|
||||
var (
|
||||
modelCapacityExhaustedMu sync.RWMutex
|
||||
modelCapacityExhaustedUntil = make(map[string]time.Time) // modelName -> cooldown until
|
||||
)
|
||||
|
||||
const (
|
||||
antigravityBillingModelEnv = "GATEWAY_ANTIGRAVITY_BILL_WITH_MAPPED_MODEL"
|
||||
antigravityFallbackSecondsEnv = "GATEWAY_ANTIGRAVITY_FALLBACK_COOLDOWN_SECONDS"
|
||||
@@ -211,17 +221,38 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
if isModelCapacityExhausted {
|
||||
maxAttempts = antigravityModelCapacityRetryMaxAttempts
|
||||
waitDuration = antigravityModelCapacityRetryWait
|
||||
|
||||
// 全局去重:如果其他 goroutine 已在重试同一模型且尚在 cooldown 中,直接返回 503
|
||||
if modelName != "" {
|
||||
modelCapacityExhaustedMu.RLock()
|
||||
cooldownUntil, exists := modelCapacityExhaustedUntil[modelName]
|
||||
modelCapacityExhaustedMu.RUnlock()
|
||||
if exists && time.Now().Before(cooldownUntil) {
|
||||
log.Printf("%s status=%d model_capacity_exhausted_dedup model=%s account=%d cooldown_until=%v (skip retry)",
|
||||
p.prefix, resp.StatusCode, modelName, p.account.ID, cooldownUntil.Format("15:04:05"))
|
||||
return &smartRetryResult{
|
||||
action: smartRetryActionBreakWithResp,
|
||||
resp: &http.Response{
|
||||
StatusCode: resp.StatusCode,
|
||||
Header: resp.Header.Clone(),
|
||||
Body: io.NopCloser(bytes.NewReader(respBody)),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for attempt := 1; attempt <= maxAttempts; attempt++ {
|
||||
log.Printf("%s status=%d oauth_smart_retry attempt=%d/%d delay=%v model=%s account=%d",
|
||||
p.prefix, resp.StatusCode, attempt, maxAttempts, waitDuration, modelName, p.account.ID)
|
||||
|
||||
timer := time.NewTimer(waitDuration)
|
||||
select {
|
||||
case <-p.ctx.Done():
|
||||
timer.Stop()
|
||||
log.Printf("%s status=context_canceled_during_smart_retry", p.prefix)
|
||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
|
||||
case <-time.After(waitDuration):
|
||||
case <-timer.C:
|
||||
}
|
||||
|
||||
// 智能重试:创建新请求
|
||||
@@ -242,6 +273,12 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
retryResp, retryErr := p.httpUpstream.Do(retryReq, p.proxyURL, p.account.ID, p.account.Concurrency)
|
||||
if retryErr == nil && retryResp != nil && retryResp.StatusCode != http.StatusTooManyRequests && retryResp.StatusCode != http.StatusServiceUnavailable {
|
||||
log.Printf("%s status=%d smart_retry_success attempt=%d/%d", p.prefix, retryResp.StatusCode, attempt, maxAttempts)
|
||||
// 重试成功,清除 MODEL_CAPACITY_EXHAUSTED cooldown
|
||||
if isModelCapacityExhausted && modelName != "" {
|
||||
modelCapacityExhaustedMu.Lock()
|
||||
delete(modelCapacityExhaustedUntil, modelName)
|
||||
modelCapacityExhaustedMu.Unlock()
|
||||
}
|
||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, resp: retryResp}
|
||||
}
|
||||
|
||||
@@ -257,7 +294,7 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
}
|
||||
lastRetryResp = retryResp
|
||||
if retryResp != nil {
|
||||
lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
|
||||
lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 8<<10))
|
||||
_ = retryResp.Body.Close()
|
||||
}
|
||||
|
||||
@@ -283,6 +320,12 @@ func (s *AntigravityGatewayService) handleSmartRetry(p antigravityRetryLoopParam
|
||||
// MODEL_CAPACITY_EXHAUSTED:模型容量不足,切换账号无意义
|
||||
// 直接返回上游错误响应,不设置模型限流,不切换账号
|
||||
if isModelCapacityExhausted {
|
||||
// 设置 cooldown,让后续请求快速失败,避免重复重试
|
||||
if modelName != "" {
|
||||
modelCapacityExhaustedMu.Lock()
|
||||
modelCapacityExhaustedUntil[modelName] = time.Now().Add(antigravityModelCapacityCooldown)
|
||||
modelCapacityExhaustedMu.Unlock()
|
||||
}
|
||||
log.Printf("%s status=%d smart_retry_exhausted_model_capacity attempts=%d model=%s account=%d body=%s (model capacity exhausted, not switching account)",
|
||||
p.prefix, resp.StatusCode, maxAttempts, modelName, p.account.ID, truncateForLog(retryBody, 200))
|
||||
return &smartRetryResult{
|
||||
@@ -395,11 +438,13 @@ func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace(
|
||||
log.Printf("%s status=%d single_account_503_retry attempt=%d/%d delay=%v total_waited=%v model=%s account=%d",
|
||||
p.prefix, resp.StatusCode, attempt, antigravitySingleAccountSmartRetryMaxAttempts, waitDuration, totalWaited, modelName, p.account.ID)
|
||||
|
||||
timer := time.NewTimer(waitDuration)
|
||||
select {
|
||||
case <-p.ctx.Done():
|
||||
timer.Stop()
|
||||
log.Printf("%s status=context_canceled_during_single_account_retry", p.prefix)
|
||||
return &smartRetryResult{action: smartRetryActionBreakWithResp, err: p.ctx.Err()}
|
||||
case <-time.After(waitDuration):
|
||||
case <-timer.C:
|
||||
}
|
||||
totalWaited += waitDuration
|
||||
|
||||
@@ -433,7 +478,7 @@ func (s *AntigravityGatewayService) handleSingleAccountRetryInPlace(
|
||||
_ = lastRetryResp.Body.Close()
|
||||
}
|
||||
lastRetryResp = retryResp
|
||||
lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
|
||||
lastRetryBody, _ = io.ReadAll(io.LimitReader(retryResp.Body, 8<<10))
|
||||
_ = retryResp.Body.Close()
|
||||
|
||||
// 解析新的重试信息,更新下次等待时间
|
||||
@@ -1404,7 +1449,7 @@ func (s *AntigravityGatewayService) Forward(ctx context.Context, c *gin.Context,
|
||||
break
|
||||
}
|
||||
|
||||
retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 2<<20))
|
||||
retryBody, _ := io.ReadAll(io.LimitReader(retryResp.Body, 8<<10))
|
||||
_ = retryResp.Body.Close()
|
||||
if retryResp.StatusCode == http.StatusTooManyRequests {
|
||||
retryBaseURL := ""
|
||||
@@ -2211,10 +2256,12 @@ func sleepAntigravityBackoffWithContext(ctx context.Context, attempt int) bool {
|
||||
sleepFor = 0
|
||||
}
|
||||
|
||||
timer := time.NewTimer(sleepFor)
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
timer.Stop()
|
||||
return false
|
||||
case <-time.After(sleepFor):
|
||||
case <-timer.C:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user