From 9f61407bf05e489fb6631ea875b25979b2b44b71 Mon Sep 17 00:00:00 2001 From: Seefs Date: Wed, 25 Mar 2026 13:11:51 +0800 Subject: [PATCH 1/3] fix: restore pre-3400 OpenRouter billing semantics --- service/text_quota.go | 8 ++- service/text_quota_test.go | 111 +++++++++++++++++++++++++++++++++++++ 2 files changed, 116 insertions(+), 3 deletions(-) diff --git a/service/text_quota.go b/service/text_quota.go index a300097e..6fe37997 100644 --- a/service/text_quota.go +++ b/service/text_quota.go @@ -113,8 +113,10 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf summary.ImageTokens = usage.PromptTokensDetails.ImageTokens summary.AudioTokens = usage.PromptTokensDetails.AudioTokens legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage) + isOpenRouter := relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter + isOpenRouterClaudeBilling := isOpenRouter && summary.IsClaudeUsageSemantic - if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter { + if isOpenRouterClaudeBilling { summary.PromptTokens -= summary.CacheTokens isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio) if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings { @@ -197,7 +199,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf var cachedTokensWithRatio decimal.Decimal if !dCacheTokens.IsZero() { - if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived { + if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter { baseTokens = baseTokens.Sub(dCacheTokens) } cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio) @@ -206,7 +208,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf var cachedCreationTokensWithRatio decimal.Decimal hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0 if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens { - if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived { + if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter { baseTokens = baseTokens.Sub(dCachedCreationTokens) cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio) } else { diff --git a/service/text_quota_test.go b/service/text_quota_test.go index 4370b16e..734eacf9 100644 --- a/service/text_quota_test.go +++ b/service/text_quota_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/QuantumNous/new-api/constant" "github.com/QuantumNous/new-api/dto" relaycommon "github.com/QuantumNous/new-api/relay/common" "github.com/QuantumNous/new-api/types" @@ -204,3 +205,113 @@ func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testi // 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624 require.Equal(t, 1624, summary.Quota) } + +func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheTokensFromPrompt(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + OriginModelName: "openai/gpt-4.1", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelType: constant.ChannelTypeOpenRouter, + }, + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheRatio: 0.1, + CacheCreationRatio: 1.25, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 2604, + CompletionTokens: 383, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 2432, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // OpenRouter usage is already normalized. prompt_tokens should stay intact. + // quota = 2604 + 2432*0.1 + 383 = 3230.2 => 3230 + require.Equal(t, 2604, summary.PromptTokens) + require.Equal(t, 3230, summary.Quota) +} + +func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheCreationTokensFromPrompt(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + OriginModelName: "openai/gpt-4.1", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelType: constant.ChannelTypeOpenRouter, + }, + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheCreationRatio: 1.25, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 2604, + CompletionTokens: 383, + PromptTokensDetails: dto.InputTokenDetails{ + CachedCreationTokens: 100, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // OpenRouter usage is already normalized. prompt_tokens should stay intact. + // quota = 2604 + 100*1.25 + 383 = 3112 + require.Equal(t, 2604, summary.PromptTokens) + require.Equal(t, 3112, summary.Quota) +} + +func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + FinalRequestRelayFormat: types.RelayFormatClaude, + OriginModelName: "anthropic/claude-3.7-sonnet", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelType: constant.ChannelTypeOpenRouter, + }, + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheRatio: 0.1, + CacheCreationRatio: 1.25, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 2604, + CompletionTokens: 383, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 2432, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // Pre-PR PostClaudeConsumeQuota behavior for OpenRouter: + // prompt = 2604 - 2432 = 172 + // quota = 172 + 2432*0.1 + 383 = 798.2 => 798 + require.True(t, summary.IsClaudeUsageSemantic) + require.Equal(t, 172, summary.PromptTokens) + require.Equal(t, 798, summary.Quota) +} From d4a470a638e6ecbd7e5ab75351293f32622bc148 Mon Sep 17 00:00:00 2001 From: Seefs Date: Wed, 25 Mar 2026 13:24:52 +0800 Subject: [PATCH 2/3] fix: restore pre-3400 OpenRouter billing semantics --- service/text_quota.go | 9 +++++---- service/text_quota_test.go | 17 +++++++++-------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/service/text_quota.go b/service/text_quota.go index 6fe37997..8caee8f2 100644 --- a/service/text_quota.go +++ b/service/text_quota.go @@ -113,8 +113,9 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf summary.ImageTokens = usage.PromptTokensDetails.ImageTokens summary.AudioTokens = usage.PromptTokensDetails.AudioTokens legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage) - isOpenRouter := relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter - isOpenRouterClaudeBilling := isOpenRouter && summary.IsClaudeUsageSemantic + isOpenRouterClaudeBilling := relayInfo.ChannelMeta != nil && + relayInfo.ChannelType == constant.ChannelTypeOpenRouter && + summary.IsClaudeUsageSemantic if isOpenRouterClaudeBilling { summary.PromptTokens -= summary.CacheTokens @@ -199,7 +200,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf var cachedTokensWithRatio decimal.Decimal if !dCacheTokens.IsZero() { - if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter { + if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived { baseTokens = baseTokens.Sub(dCacheTokens) } cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio) @@ -208,7 +209,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf var cachedCreationTokensWithRatio decimal.Decimal hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0 if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens { - if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter { + if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived { baseTokens = baseTokens.Sub(dCachedCreationTokens) cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio) } else { diff --git a/service/text_quota_test.go b/service/text_quota_test.go index 734eacf9..e995de17 100644 --- a/service/text_quota_test.go +++ b/service/text_quota_test.go @@ -206,7 +206,7 @@ func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testi require.Equal(t, 1624, summary.Quota) } -func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheTokensFromPrompt(t *testing.T) { +func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheReadFromPromptBilling(t *testing.T) { gin.SetMode(gin.TestMode) w := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(w) @@ -236,13 +236,14 @@ func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheTokensFromPrompt summary := calculateTextQuotaSummary(ctx, relayInfo, usage) - // OpenRouter usage is already normalized. prompt_tokens should stay intact. - // quota = 2604 + 2432*0.1 + 383 = 3230.2 => 3230 + // OpenRouter OpenAI-format display keeps prompt_tokens as total input, + // but billing still separates normal input from cache read tokens. + // quota = (2604 - 2432) + 2432*0.1 + 383 = 798.2 => 798 require.Equal(t, 2604, summary.PromptTokens) - require.Equal(t, 3230, summary.Quota) + require.Equal(t, 798, summary.Quota) } -func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheCreationTokensFromPrompt(t *testing.T) { +func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheCreationFromPromptBilling(t *testing.T) { gin.SetMode(gin.TestMode) w := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(w) @@ -271,10 +272,10 @@ func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheCreationTokensFr summary := calculateTextQuotaSummary(ctx, relayInfo, usage) - // OpenRouter usage is already normalized. prompt_tokens should stay intact. - // quota = 2604 + 100*1.25 + 383 = 3112 + // prompt_tokens is still logged as total input, but cache creation is billed separately. + // quota = (2604 - 100) + 100*1.25 + 383 = 3012 require.Equal(t, 2604, summary.PromptTokens) - require.Equal(t, 3112, summary.Quota) + require.Equal(t, 3012, summary.Quota) } func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) { From 926e1781dd87bd249218cd60c985c89ef4e48309 Mon Sep 17 00:00:00 2001 From: Seefs Date: Wed, 25 Mar 2026 13:49:21 +0800 Subject: [PATCH 3/3] fix: preserve cache usage in openai-to-claude response conversion --- service/convert.go | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/service/convert.go b/service/convert.go index 7b5f3946..59d4f8fe 100644 --- a/service/convert.go +++ b/service/convert.go @@ -616,10 +616,7 @@ func ResponseOpenAI2Claude(openAIResponse *dto.OpenAITextResponse, info *relayco } claudeResponse.Content = contents claudeResponse.StopReason = stopReason - claudeResponse.Usage = &dto.ClaudeUsage{ - InputTokens: openAIResponse.PromptTokens, - OutputTokens: openAIResponse.CompletionTokens, - } + claudeResponse.Usage = buildClaudeUsageFromOpenAIUsage(&openAIResponse.Usage) return claudeResponse }