From 9f61407bf05e489fb6631ea875b25979b2b44b71 Mon Sep 17 00:00:00 2001 From: Seefs Date: Wed, 25 Mar 2026 13:11:51 +0800 Subject: [PATCH] fix: restore pre-3400 OpenRouter billing semantics --- service/text_quota.go | 8 ++- service/text_quota_test.go | 111 +++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 3 deletions(-) diff --git a/service/text_quota.go b/service/text_quota.go index a300097e..6fe37997 100644 --- a/service/text_quota.go +++ b/service/text_quota.go @@ -113,8 +113,10 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf summary.ImageTokens = usage.PromptTokensDetails.ImageTokens summary.AudioTokens = usage.PromptTokensDetails.AudioTokens legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage) + isOpenRouter := relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter + isOpenRouterClaudeBilling := isOpenRouter && summary.IsClaudeUsageSemantic - if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter { + if isOpenRouterClaudeBilling { summary.PromptTokens -= summary.CacheTokens isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio) if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings { @@ -197,7 +199,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf var cachedTokensWithRatio decimal.Decimal if !dCacheTokens.IsZero() { - if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived { + if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter { baseTokens = baseTokens.Sub(dCacheTokens) } cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio) @@ -206,7 +208,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf var cachedCreationTokensWithRatio decimal.Decimal hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0 if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens { - if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived { + if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter { baseTokens = baseTokens.Sub(dCachedCreationTokens) cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio) } else { diff --git a/service/text_quota_test.go b/service/text_quota_test.go index 4370b16e..734eacf9 100644 --- a/service/text_quota_test.go +++ b/service/text_quota_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/QuantumNous/new-api/constant" "github.com/QuantumNous/new-api/dto" relaycommon "github.com/QuantumNous/new-api/relay/common" "github.com/QuantumNous/new-api/types" @@ -204,3 +205,113 @@ func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testi // 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624 require.Equal(t, 1624, summary.Quota) } + +func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheTokensFromPrompt(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + OriginModelName: "openai/gpt-4.1", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelType: constant.ChannelTypeOpenRouter, + }, + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheRatio: 0.1, + CacheCreationRatio: 1.25, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 2604, + CompletionTokens: 383, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 2432, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // OpenRouter usage is already normalized. prompt_tokens should stay intact. + // quota = 2604 + 2432*0.1 + 383 = 3230.2 => 3230 + require.Equal(t, 2604, summary.PromptTokens) + require.Equal(t, 3230, summary.Quota) +} + +func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheCreationTokensFromPrompt(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + OriginModelName: "openai/gpt-4.1", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelType: constant.ChannelTypeOpenRouter, + }, + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheCreationRatio: 1.25, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 2604, + CompletionTokens: 383, + PromptTokensDetails: dto.InputTokenDetails{ + CachedCreationTokens: 100, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // OpenRouter usage is already normalized. prompt_tokens should stay intact. + // quota = 2604 + 100*1.25 + 383 = 3112 + require.Equal(t, 2604, summary.PromptTokens) + require.Equal(t, 3112, summary.Quota) +} + +func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + FinalRequestRelayFormat: types.RelayFormatClaude, + OriginModelName: "anthropic/claude-3.7-sonnet", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelType: constant.ChannelTypeOpenRouter, + }, + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheRatio: 0.1, + CacheCreationRatio: 1.25, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 2604, + CompletionTokens: 383, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 2432, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // Pre-PR PostClaudeConsumeQuota behavior for OpenRouter: + // prompt = 2604 - 2432 = 172 + // quota = 172 + 2432*0.1 + 383 = 798.2 => 798 + require.True(t, summary.IsClaudeUsageSemantic) + require.Equal(t, 172, summary.PromptTokens) + require.Equal(t, 798, summary.Quota) +}