From 72b08f9cc56939bc9e0834730709088bea932bad Mon Sep 17 00:00:00 2001 From: erio Date: Mon, 9 Feb 2026 06:57:07 +0800 Subject: [PATCH] fix: ensure sticky session failover triggers cache billing exemption --- backend/internal/handler/gateway_handler.go | 10 ++++++++-- backend/internal/handler/gemini_v1beta_handler.go | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/backend/internal/handler/gateway_handler.go b/backend/internal/handler/gateway_handler.go index ebf61f37..6900fa55 100644 --- a/backend/internal/handler/gateway_handler.go +++ b/backend/internal/handler/gateway_handler.go @@ -341,7 +341,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { if errors.As(err, &failoverErr) { failedAccountIDs[account.ID] = struct{}{} lastFailoverErr = failoverErr - if failoverErr.ForceCacheBilling { + if needForceCacheBilling(hasBoundSession, failoverErr) { forceCacheBilling = true } if switchCount >= maxAccountSwitches { @@ -541,7 +541,7 @@ func (h *GatewayHandler) Messages(c *gin.Context) { if errors.As(err, &failoverErr) { failedAccountIDs[account.ID] = struct{}{} lastFailoverErr = failoverErr - if failoverErr.ForceCacheBilling { + if needForceCacheBilling(hasBoundSession, failoverErr) { forceCacheBilling = true } if switchCount >= maxAccountSwitches { @@ -817,6 +817,12 @@ func (h *GatewayHandler) handleConcurrencyError(c *gin.Context, err error, slotT fmt.Sprintf("Concurrency limit exceeded for %s, please retry later", slotType), streamStarted) } +// needForceCacheBilling 判断 failover 时是否需要强制缓存计费 +// 粘性会话切换账号、或上游明确标记时,将 input_tokens 转为 cache_read 计费 +func needForceCacheBilling(hasBoundSession bool, failoverErr *service.UpstreamFailoverError) bool { + return hasBoundSession || (failoverErr != nil && failoverErr.ForceCacheBilling) +} + // sleepFailoverDelay 账号切换线性递增延时:第1次0s、第2次1s、第3次2s… // 返回 false 表示 context 已取消。 func sleepFailoverDelay(ctx context.Context, switchCount int) bool { diff --git a/backend/internal/handler/gemini_v1beta_handler.go b/backend/internal/handler/gemini_v1beta_handler.go index 0bf7e95f..d29749c7 100644 --- a/backend/internal/handler/gemini_v1beta_handler.go +++ b/backend/internal/handler/gemini_v1beta_handler.go @@ -422,7 +422,7 @@ func (h *GatewayHandler) GeminiV1BetaModels(c *gin.Context) { var failoverErr *service.UpstreamFailoverError if errors.As(err, &failoverErr) { failedAccountIDs[account.ID] = struct{}{} - if failoverErr.ForceCacheBilling { + if needForceCacheBilling(hasBoundSession, failoverErr) { forceCacheBilling = true } if switchCount >= maxAccountSwitches {