Files
new-api-oiss/service/text_quota_test.go

319 lines
8.9 KiB
Go

package service
import (
"net/http/httptest"
"testing"
"time"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/require"
)
func TestCalculateTextQuotaSummaryUnifiedForClaudeSemantic(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
usage := &dto.Usage{
PromptTokens: 1000,
CompletionTokens: 200,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 100,
CachedCreationTokens: 50,
},
ClaudeCacheCreation5mTokens: 10,
ClaudeCacheCreation1hTokens: 20,
}
priceData := types.PriceData{
ModelRatio: 1,
CompletionRatio: 2,
CacheRatio: 0.1,
CacheCreationRatio: 1.25,
CacheCreation5mRatio: 1.25,
CacheCreation1hRatio: 2,
GroupRatioInfo: types.GroupRatioInfo{
GroupRatio: 1,
},
}
chatRelayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
FinalRequestRelayFormat: types.RelayFormatClaude,
OriginModelName: "claude-3-7-sonnet",
PriceData: priceData,
StartTime: time.Now(),
}
messageRelayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatClaude,
FinalRequestRelayFormat: types.RelayFormatClaude,
OriginModelName: "claude-3-7-sonnet",
PriceData: priceData,
StartTime: time.Now(),
}
chatSummary := calculateTextQuotaSummary(ctx, chatRelayInfo, usage)
messageSummary := calculateTextQuotaSummary(ctx, messageRelayInfo, usage)
require.Equal(t, messageSummary.Quota, chatSummary.Quota)
require.Equal(t, messageSummary.CacheCreationTokens5m, chatSummary.CacheCreationTokens5m)
require.Equal(t, messageSummary.CacheCreationTokens1h, chatSummary.CacheCreationTokens1h)
require.True(t, chatSummary.IsClaudeUsageSemantic)
require.Equal(t, 1488, chatSummary.Quota)
}
func TestCalculateTextQuotaSummaryUsesSplitClaudeCacheCreationRatios(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
relayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
FinalRequestRelayFormat: types.RelayFormatClaude,
OriginModelName: "claude-3-7-sonnet",
PriceData: types.PriceData{
ModelRatio: 1,
CompletionRatio: 1,
CacheRatio: 0,
CacheCreationRatio: 1,
CacheCreation5mRatio: 2,
CacheCreation1hRatio: 3,
GroupRatioInfo: types.GroupRatioInfo{
GroupRatio: 1,
},
},
StartTime: time.Now(),
}
usage := &dto.Usage{
PromptTokens: 100,
CompletionTokens: 0,
PromptTokensDetails: dto.InputTokenDetails{
CachedCreationTokens: 10,
},
ClaudeCacheCreation5mTokens: 2,
ClaudeCacheCreation1hTokens: 3,
}
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
// 100 + remaining(5)*1 + 2*2 + 3*3 = 118
require.Equal(t, 118, summary.Quota)
}
func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
relayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
OriginModelName: "claude-3-7-sonnet",
PriceData: types.PriceData{
ModelRatio: 1,
CompletionRatio: 2,
CacheRatio: 0.1,
CacheCreationRatio: 1.25,
CacheCreation5mRatio: 1.25,
CacheCreation1hRatio: 2,
GroupRatioInfo: types.GroupRatioInfo{
GroupRatio: 1,
},
},
StartTime: time.Now(),
}
usage := &dto.Usage{
PromptTokens: 1000,
CompletionTokens: 200,
UsageSemantic: "anthropic",
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 100,
CachedCreationTokens: 50,
},
ClaudeCacheCreation5mTokens: 10,
ClaudeCacheCreation1hTokens: 20,
}
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
require.True(t, summary.IsClaudeUsageSemantic)
require.Equal(t, "anthropic", summary.UsageSemantic)
require.Equal(t, 1488, summary.Quota)
}
func TestCacheWriteTokensTotal(t *testing.T) {
t.Run("split cache creation", func(t *testing.T) {
summary := textQuotaSummary{
CacheCreationTokens: 50,
CacheCreationTokens5m: 10,
CacheCreationTokens1h: 20,
}
require.Equal(t, 50, cacheWriteTokensTotal(summary))
})
t.Run("legacy cache creation", func(t *testing.T) {
summary := textQuotaSummary{CacheCreationTokens: 50}
require.Equal(t, 50, cacheWriteTokensTotal(summary))
})
t.Run("split cache creation without aggregate remainder", func(t *testing.T) {
summary := textQuotaSummary{
CacheCreationTokens5m: 10,
CacheCreationTokens1h: 20,
}
require.Equal(t, 30, cacheWriteTokensTotal(summary))
})
}
func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
relayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
OriginModelName: "claude-3-7-sonnet",
PriceData: types.PriceData{
ModelRatio: 1,
CompletionRatio: 5,
CacheRatio: 0.1,
CacheCreationRatio: 1.25,
CacheCreation5mRatio: 1.25,
CacheCreation1hRatio: 2,
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
},
StartTime: time.Now(),
}
usage := &dto.Usage{
PromptTokens: 62,
CompletionTokens: 95,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 3544,
},
ClaudeCacheCreation5mTokens: 586,
}
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
require.Equal(t, 1624, summary.Quota)
}
func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheReadFromPromptBilling(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
relayInfo := &relaycommon.RelayInfo{
OriginModelName: "openai/gpt-4.1",
ChannelMeta: &relaycommon.ChannelMeta{
ChannelType: constant.ChannelTypeOpenRouter,
},
PriceData: types.PriceData{
ModelRatio: 1,
CompletionRatio: 1,
CacheRatio: 0.1,
CacheCreationRatio: 1.25,
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
},
StartTime: time.Now(),
}
usage := &dto.Usage{
PromptTokens: 2604,
CompletionTokens: 383,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 2432,
},
}
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
// OpenRouter OpenAI-format display keeps prompt_tokens as total input,
// but billing still separates normal input from cache read tokens.
// quota = (2604 - 2432) + 2432*0.1 + 383 = 798.2 => 798
require.Equal(t, 2604, summary.PromptTokens)
require.Equal(t, 798, summary.Quota)
}
func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheCreationFromPromptBilling(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
relayInfo := &relaycommon.RelayInfo{
OriginModelName: "openai/gpt-4.1",
ChannelMeta: &relaycommon.ChannelMeta{
ChannelType: constant.ChannelTypeOpenRouter,
},
PriceData: types.PriceData{
ModelRatio: 1,
CompletionRatio: 1,
CacheCreationRatio: 1.25,
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
},
StartTime: time.Now(),
}
usage := &dto.Usage{
PromptTokens: 2604,
CompletionTokens: 383,
PromptTokensDetails: dto.InputTokenDetails{
CachedCreationTokens: 100,
},
}
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
// prompt_tokens is still logged as total input, but cache creation is billed separately.
// quota = (2604 - 100) + 100*1.25 + 383 = 3012
require.Equal(t, 2604, summary.PromptTokens)
require.Equal(t, 3012, summary.Quota)
}
func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
relayInfo := &relaycommon.RelayInfo{
FinalRequestRelayFormat: types.RelayFormatClaude,
OriginModelName: "anthropic/claude-3.7-sonnet",
ChannelMeta: &relaycommon.ChannelMeta{
ChannelType: constant.ChannelTypeOpenRouter,
},
PriceData: types.PriceData{
ModelRatio: 1,
CompletionRatio: 1,
CacheRatio: 0.1,
CacheCreationRatio: 1.25,
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
},
StartTime: time.Now(),
}
usage := &dto.Usage{
PromptTokens: 2604,
CompletionTokens: 383,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 2432,
},
}
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
// Pre-PR PostClaudeConsumeQuota behavior for OpenRouter:
// prompt = 2604 - 2432 = 172
// quota = 172 + 2432*0.1 + 383 = 798.2 => 798
require.True(t, summary.IsClaudeUsageSemantic)
require.Equal(t, 172, summary.PromptTokens)
require.Equal(t, 798, summary.Quota)
}