Merge pull request #3438 from seefs001/fix/openrouter-usage
fix: restore pre-3400 OpenRouter billing semantics
This commit is contained in:
@@ -616,10 +616,7 @@ func ResponseOpenAI2Claude(openAIResponse *dto.OpenAITextResponse, info *relayco
|
||||
}
|
||||
claudeResponse.Content = contents
|
||||
claudeResponse.StopReason = stopReason
|
||||
claudeResponse.Usage = &dto.ClaudeUsage{
|
||||
InputTokens: openAIResponse.PromptTokens,
|
||||
OutputTokens: openAIResponse.CompletionTokens,
|
||||
}
|
||||
claudeResponse.Usage = buildClaudeUsageFromOpenAIUsage(&openAIResponse.Usage)
|
||||
|
||||
return claudeResponse
|
||||
}
|
||||
|
||||
@@ -113,8 +113,11 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
|
||||
summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
|
||||
summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
|
||||
legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
|
||||
isOpenRouterClaudeBilling := relayInfo.ChannelMeta != nil &&
|
||||
relayInfo.ChannelType == constant.ChannelTypeOpenRouter &&
|
||||
summary.IsClaudeUsageSemantic
|
||||
|
||||
if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
|
||||
if isOpenRouterClaudeBilling {
|
||||
summary.PromptTokens -= summary.CacheTokens
|
||||
isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio)
|
||||
if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/QuantumNous/new-api/constant"
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||
"github.com/QuantumNous/new-api/types"
|
||||
@@ -204,3 +205,114 @@ func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testi
|
||||
// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
|
||||
require.Equal(t, 1624, summary.Quota)
|
||||
}
|
||||
|
||||
func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheReadFromPromptBilling(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
ctx, _ := gin.CreateTestContext(w)
|
||||
|
||||
relayInfo := &relaycommon.RelayInfo{
|
||||
OriginModelName: "openai/gpt-4.1",
|
||||
ChannelMeta: &relaycommon.ChannelMeta{
|
||||
ChannelType: constant.ChannelTypeOpenRouter,
|
||||
},
|
||||
PriceData: types.PriceData{
|
||||
ModelRatio: 1,
|
||||
CompletionRatio: 1,
|
||||
CacheRatio: 0.1,
|
||||
CacheCreationRatio: 1.25,
|
||||
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
|
||||
},
|
||||
StartTime: time.Now(),
|
||||
}
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: 2604,
|
||||
CompletionTokens: 383,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 2432,
|
||||
},
|
||||
}
|
||||
|
||||
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||
|
||||
// OpenRouter OpenAI-format display keeps prompt_tokens as total input,
|
||||
// but billing still separates normal input from cache read tokens.
|
||||
// quota = (2604 - 2432) + 2432*0.1 + 383 = 798.2 => 798
|
||||
require.Equal(t, 2604, summary.PromptTokens)
|
||||
require.Equal(t, 798, summary.Quota)
|
||||
}
|
||||
|
||||
func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheCreationFromPromptBilling(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
ctx, _ := gin.CreateTestContext(w)
|
||||
|
||||
relayInfo := &relaycommon.RelayInfo{
|
||||
OriginModelName: "openai/gpt-4.1",
|
||||
ChannelMeta: &relaycommon.ChannelMeta{
|
||||
ChannelType: constant.ChannelTypeOpenRouter,
|
||||
},
|
||||
PriceData: types.PriceData{
|
||||
ModelRatio: 1,
|
||||
CompletionRatio: 1,
|
||||
CacheCreationRatio: 1.25,
|
||||
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
|
||||
},
|
||||
StartTime: time.Now(),
|
||||
}
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: 2604,
|
||||
CompletionTokens: 383,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedCreationTokens: 100,
|
||||
},
|
||||
}
|
||||
|
||||
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||
|
||||
// prompt_tokens is still logged as total input, but cache creation is billed separately.
|
||||
// quota = (2604 - 100) + 100*1.25 + 383 = 3012
|
||||
require.Equal(t, 2604, summary.PromptTokens)
|
||||
require.Equal(t, 3012, summary.Quota)
|
||||
}
|
||||
|
||||
func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
ctx, _ := gin.CreateTestContext(w)
|
||||
|
||||
relayInfo := &relaycommon.RelayInfo{
|
||||
FinalRequestRelayFormat: types.RelayFormatClaude,
|
||||
OriginModelName: "anthropic/claude-3.7-sonnet",
|
||||
ChannelMeta: &relaycommon.ChannelMeta{
|
||||
ChannelType: constant.ChannelTypeOpenRouter,
|
||||
},
|
||||
PriceData: types.PriceData{
|
||||
ModelRatio: 1,
|
||||
CompletionRatio: 1,
|
||||
CacheRatio: 0.1,
|
||||
CacheCreationRatio: 1.25,
|
||||
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
|
||||
},
|
||||
StartTime: time.Now(),
|
||||
}
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: 2604,
|
||||
CompletionTokens: 383,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 2432,
|
||||
},
|
||||
}
|
||||
|
||||
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||
|
||||
// Pre-PR PostClaudeConsumeQuota behavior for OpenRouter:
|
||||
// prompt = 2604 - 2432 = 172
|
||||
// quota = 172 + 2432*0.1 + 383 = 798.2 => 798
|
||||
require.True(t, summary.IsClaudeUsageSemantic)
|
||||
require.Equal(t, 172, summary.PromptTokens)
|
||||
require.Equal(t, 798, summary.Quota)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user