diff --git a/service/convert.go b/service/convert.go index 7b5f3946..59d4f8fe 100644 --- a/service/convert.go +++ b/service/convert.go @@ -616,10 +616,7 @@ func ResponseOpenAI2Claude(openAIResponse *dto.OpenAITextResponse, info *relayco } claudeResponse.Content = contents claudeResponse.StopReason = stopReason - claudeResponse.Usage = &dto.ClaudeUsage{ - InputTokens: openAIResponse.PromptTokens, - OutputTokens: openAIResponse.CompletionTokens, - } + claudeResponse.Usage = buildClaudeUsageFromOpenAIUsage(&openAIResponse.Usage) return claudeResponse } diff --git a/service/text_quota.go b/service/text_quota.go index a300097e..8caee8f2 100644 --- a/service/text_quota.go +++ b/service/text_quota.go @@ -113,8 +113,11 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf summary.ImageTokens = usage.PromptTokensDetails.ImageTokens summary.AudioTokens = usage.PromptTokensDetails.AudioTokens legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage) + isOpenRouterClaudeBilling := relayInfo.ChannelMeta != nil && + relayInfo.ChannelType == constant.ChannelTypeOpenRouter && + summary.IsClaudeUsageSemantic - if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter { + if isOpenRouterClaudeBilling { summary.PromptTokens -= summary.CacheTokens isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio) if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings { diff --git a/service/text_quota_test.go b/service/text_quota_test.go index 4370b16e..e995de17 100644 --- a/service/text_quota_test.go +++ b/service/text_quota_test.go @@ -5,6 +5,7 @@ import ( "testing" "time" + "github.com/QuantumNous/new-api/constant" "github.com/QuantumNous/new-api/dto" relaycommon "github.com/QuantumNous/new-api/relay/common" "github.com/QuantumNous/new-api/types" @@ -204,3 +205,114 @@ func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testi // 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624 require.Equal(t, 1624, summary.Quota) } + +func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheReadFromPromptBilling(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + OriginModelName: "openai/gpt-4.1", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelType: constant.ChannelTypeOpenRouter, + }, + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheRatio: 0.1, + CacheCreationRatio: 1.25, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 2604, + CompletionTokens: 383, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 2432, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // OpenRouter OpenAI-format display keeps prompt_tokens as total input, + // but billing still separates normal input from cache read tokens. + // quota = (2604 - 2432) + 2432*0.1 + 383 = 798.2 => 798 + require.Equal(t, 2604, summary.PromptTokens) + require.Equal(t, 798, summary.Quota) +} + +func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheCreationFromPromptBilling(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + OriginModelName: "openai/gpt-4.1", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelType: constant.ChannelTypeOpenRouter, + }, + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheCreationRatio: 1.25, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 2604, + CompletionTokens: 383, + PromptTokensDetails: dto.InputTokenDetails{ + CachedCreationTokens: 100, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // prompt_tokens is still logged as total input, but cache creation is billed separately. + // quota = (2604 - 100) + 100*1.25 + 383 = 3012 + require.Equal(t, 2604, summary.PromptTokens) + require.Equal(t, 3012, summary.Quota) +} + +func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + FinalRequestRelayFormat: types.RelayFormatClaude, + OriginModelName: "anthropic/claude-3.7-sonnet", + ChannelMeta: &relaycommon.ChannelMeta{ + ChannelType: constant.ChannelTypeOpenRouter, + }, + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheRatio: 0.1, + CacheCreationRatio: 1.25, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 2604, + CompletionTokens: 383, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 2432, + }, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // Pre-PR PostClaudeConsumeQuota behavior for OpenRouter: + // prompt = 2604 - 2432 = 172 + // quota = 172 + 2432*0.1 + 383 = 798.2 => 798 + require.True(t, summary.IsClaudeUsageSemantic) + require.Equal(t, 172, summary.PromptTokens) + require.Equal(t, 798, summary.Quota) +}