fix: ensure sticky session failover triggers cache billing exemption
This commit is contained in:
@@ -341,7 +341,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
|||||||
if errors.As(err, &failoverErr) {
|
if errors.As(err, &failoverErr) {
|
||||||
failedAccountIDs[account.ID] = struct{}{}
|
failedAccountIDs[account.ID] = struct{}{}
|
||||||
lastFailoverErr = failoverErr
|
lastFailoverErr = failoverErr
|
||||||
if failoverErr.ForceCacheBilling {
|
if needForceCacheBilling(hasBoundSession, failoverErr) {
|
||||||
forceCacheBilling = true
|
forceCacheBilling = true
|
||||||
}
|
}
|
||||||
if switchCount >= maxAccountSwitches {
|
if switchCount >= maxAccountSwitches {
|
||||||
@@ -541,7 +541,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) {
|
|||||||
if errors.As(err, &failoverErr) {
|
if errors.As(err, &failoverErr) {
|
||||||
failedAccountIDs[account.ID] = struct{}{}
|
failedAccountIDs[account.ID] = struct{}{}
|
||||||
lastFailoverErr = failoverErr
|
lastFailoverErr = failoverErr
|
||||||
if failoverErr.ForceCacheBilling {
|
if needForceCacheBilling(hasBoundSession, failoverErr) {
|
||||||
forceCacheBilling = true
|
forceCacheBilling = true
|
||||||
}
|
}
|
||||||
if switchCount >= maxAccountSwitches {
|
if switchCount >= maxAccountSwitches {
|
||||||
@@ -817,6 +817,12 @@ func (h *GatewayHandler) handleConcurrencyError(c *gin.Context, err error, slotT
|
|||||||
fmt.Sprintf("Concurrency limit exceeded for %s, please retry later", slotType), streamStarted)
|
fmt.Sprintf("Concurrency limit exceeded for %s, please retry later", slotType), streamStarted)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// needForceCacheBilling 判断 failover 时是否需要强制缓存计费
|
||||||
|
// 粘性会话切换账号、或上游明确标记时,将 input_tokens 转为 cache_read 计费
|
||||||
|
func needForceCacheBilling(hasBoundSession bool, failoverErr *service.UpstreamFailoverError) bool {
|
||||||
|
return hasBoundSession || (failoverErr != nil && failoverErr.ForceCacheBilling)
|
||||||
|
}
|
||||||
|
|
||||||
// sleepFailoverDelay 账号切换线性递增延时:第1次0s、第2次1s、第3次2s…
|
// sleepFailoverDelay 账号切换线性递增延时:第1次0s、第2次1s、第3次2s…
|
||||||
// 返回 false 表示 context 已取消。
|
// 返回 false 表示 context 已取消。
|
||||||
func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
|
func sleepFailoverDelay(ctx context.Context, switchCount int) bool {
|
||||||
|
|||||||
@@ -422,7 +422,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) {
|
|||||||
var failoverErr *service.UpstreamFailoverError
|
var failoverErr *service.UpstreamFailoverError
|
||||||
if errors.As(err, &failoverErr) {
|
if errors.As(err, &failoverErr) {
|
||||||
failedAccountIDs[account.ID] = struct{}{}
|
failedAccountIDs[account.ID] = struct{}{}
|
||||||
if failoverErr.ForceCacheBilling {
|
if needForceCacheBilling(hasBoundSession, failoverErr) {
|
||||||
forceCacheBilling = true
|
forceCacheBilling = true
|
||||||
}
|
}
|
||||||
if switchCount >= maxAccountSwitches {
|
if switchCount >= maxAccountSwitches {
|
||||||
|
|||||||
Reference in New Issue
Block a user