package service import ( "net/http/httptest" "testing" "time" "github.com/QuantumNous/new-api/constant" "github.com/QuantumNous/new-api/dto" relaycommon "github.com/QuantumNous/new-api/relay/common" "github.com/QuantumNous/new-api/types" "github.com/gin-gonic/gin" "github.com/stretchr/testify/require" ) func TestCalculateTextQuotaSummaryUnifiedForClaudeSemantic(t *testing.T) { gin.SetMode(gin.TestMode) w := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(w) usage := &dto.Usage{ PromptTokens: 1000, CompletionTokens: 200, PromptTokensDetails: dto.InputTokenDetails{ CachedTokens: 100, CachedCreationTokens: 50, }, ClaudeCacheCreation5mTokens: 10, ClaudeCacheCreation1hTokens: 20, } priceData := types.PriceData{ ModelRatio: 1, CompletionRatio: 2, CacheRatio: 0.1, CacheCreationRatio: 1.25, CacheCreation5mRatio: 1.25, CacheCreation1hRatio: 2, GroupRatioInfo: types.GroupRatioInfo{ GroupRatio: 1, }, } chatRelayInfo := &relaycommon.RelayInfo{ RelayFormat: types.RelayFormatOpenAI, FinalRequestRelayFormat: types.RelayFormatClaude, OriginModelName: "claude-3-7-sonnet", PriceData: priceData, StartTime: time.Now(), } messageRelayInfo := &relaycommon.RelayInfo{ RelayFormat: types.RelayFormatClaude, FinalRequestRelayFormat: types.RelayFormatClaude, OriginModelName: "claude-3-7-sonnet", PriceData: priceData, StartTime: time.Now(), } chatSummary := calculateTextQuotaSummary(ctx, chatRelayInfo, usage) messageSummary := calculateTextQuotaSummary(ctx, messageRelayInfo, usage) require.Equal(t, messageSummary.Quota, chatSummary.Quota) require.Equal(t, messageSummary.CacheCreationTokens5m, chatSummary.CacheCreationTokens5m) require.Equal(t, messageSummary.CacheCreationTokens1h, chatSummary.CacheCreationTokens1h) require.True(t, chatSummary.IsClaudeUsageSemantic) require.Equal(t, 1488, chatSummary.Quota) } func TestCalculateTextQuotaSummaryUsesSplitClaudeCacheCreationRatios(t *testing.T) { gin.SetMode(gin.TestMode) w := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(w) relayInfo := &relaycommon.RelayInfo{ RelayFormat: types.RelayFormatOpenAI, FinalRequestRelayFormat: types.RelayFormatClaude, OriginModelName: "claude-3-7-sonnet", PriceData: types.PriceData{ ModelRatio: 1, CompletionRatio: 1, CacheRatio: 0, CacheCreationRatio: 1, CacheCreation5mRatio: 2, CacheCreation1hRatio: 3, GroupRatioInfo: types.GroupRatioInfo{ GroupRatio: 1, }, }, StartTime: time.Now(), } usage := &dto.Usage{ PromptTokens: 100, CompletionTokens: 0, PromptTokensDetails: dto.InputTokenDetails{ CachedCreationTokens: 10, }, ClaudeCacheCreation5mTokens: 2, ClaudeCacheCreation1hTokens: 3, } summary := calculateTextQuotaSummary(ctx, relayInfo, usage) // 100 + remaining(5)*1 + 2*2 + 3*3 = 118 require.Equal(t, 118, summary.Quota) } func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t *testing.T) { gin.SetMode(gin.TestMode) w := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(w) relayInfo := &relaycommon.RelayInfo{ RelayFormat: types.RelayFormatOpenAI, OriginModelName: "claude-3-7-sonnet", PriceData: types.PriceData{ ModelRatio: 1, CompletionRatio: 2, CacheRatio: 0.1, CacheCreationRatio: 1.25, CacheCreation5mRatio: 1.25, CacheCreation1hRatio: 2, GroupRatioInfo: types.GroupRatioInfo{ GroupRatio: 1, }, }, StartTime: time.Now(), } usage := &dto.Usage{ PromptTokens: 1000, CompletionTokens: 200, UsageSemantic: "anthropic", PromptTokensDetails: dto.InputTokenDetails{ CachedTokens: 100, CachedCreationTokens: 50, }, ClaudeCacheCreation5mTokens: 10, ClaudeCacheCreation1hTokens: 20, } summary := calculateTextQuotaSummary(ctx, relayInfo, usage) require.True(t, summary.IsClaudeUsageSemantic) require.Equal(t, "anthropic", summary.UsageSemantic) require.Equal(t, 1488, summary.Quota) } func TestCacheWriteTokensTotal(t *testing.T) { t.Run("split cache creation", func(t *testing.T) { summary := textQuotaSummary{ CacheCreationTokens: 50, CacheCreationTokens5m: 10, CacheCreationTokens1h: 20, } require.Equal(t, 50, cacheWriteTokensTotal(summary)) }) t.Run("legacy cache creation", func(t *testing.T) { summary := textQuotaSummary{CacheCreationTokens: 50} require.Equal(t, 50, cacheWriteTokensTotal(summary)) }) t.Run("split cache creation without aggregate remainder", func(t *testing.T) { summary := textQuotaSummary{ CacheCreationTokens5m: 10, CacheCreationTokens1h: 20, } require.Equal(t, 30, cacheWriteTokensTotal(summary)) }) } func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testing.T) { gin.SetMode(gin.TestMode) w := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(w) relayInfo := &relaycommon.RelayInfo{ RelayFormat: types.RelayFormatOpenAI, OriginModelName: "claude-3-7-sonnet", PriceData: types.PriceData{ ModelRatio: 1, CompletionRatio: 5, CacheRatio: 0.1, CacheCreationRatio: 1.25, CacheCreation5mRatio: 1.25, CacheCreation1hRatio: 2, GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, }, StartTime: time.Now(), } usage := &dto.Usage{ PromptTokens: 62, CompletionTokens: 95, PromptTokensDetails: dto.InputTokenDetails{ CachedTokens: 3544, }, ClaudeCacheCreation5mTokens: 586, } summary := calculateTextQuotaSummary(ctx, relayInfo, usage) // 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624 require.Equal(t, 1624, summary.Quota) } func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheReadFromPromptBilling(t *testing.T) { gin.SetMode(gin.TestMode) w := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(w) relayInfo := &relaycommon.RelayInfo{ OriginModelName: "openai/gpt-4.1", ChannelMeta: &relaycommon.ChannelMeta{ ChannelType: constant.ChannelTypeOpenRouter, }, PriceData: types.PriceData{ ModelRatio: 1, CompletionRatio: 1, CacheRatio: 0.1, CacheCreationRatio: 1.25, GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, }, StartTime: time.Now(), } usage := &dto.Usage{ PromptTokens: 2604, CompletionTokens: 383, PromptTokensDetails: dto.InputTokenDetails{ CachedTokens: 2432, }, } summary := calculateTextQuotaSummary(ctx, relayInfo, usage) // OpenRouter OpenAI-format display keeps prompt_tokens as total input, // but billing still separates normal input from cache read tokens. // quota = (2604 - 2432) + 2432*0.1 + 383 = 798.2 => 798 require.Equal(t, 2604, summary.PromptTokens) require.Equal(t, 798, summary.Quota) } func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheCreationFromPromptBilling(t *testing.T) { gin.SetMode(gin.TestMode) w := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(w) relayInfo := &relaycommon.RelayInfo{ OriginModelName: "openai/gpt-4.1", ChannelMeta: &relaycommon.ChannelMeta{ ChannelType: constant.ChannelTypeOpenRouter, }, PriceData: types.PriceData{ ModelRatio: 1, CompletionRatio: 1, CacheCreationRatio: 1.25, GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, }, StartTime: time.Now(), } usage := &dto.Usage{ PromptTokens: 2604, CompletionTokens: 383, PromptTokensDetails: dto.InputTokenDetails{ CachedCreationTokens: 100, }, } summary := calculateTextQuotaSummary(ctx, relayInfo, usage) // prompt_tokens is still logged as total input, but cache creation is billed separately. // quota = (2604 - 100) + 100*1.25 + 383 = 3012 require.Equal(t, 2604, summary.PromptTokens) require.Equal(t, 3012, summary.Quota) } func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) { gin.SetMode(gin.TestMode) w := httptest.NewRecorder() ctx, _ := gin.CreateTestContext(w) relayInfo := &relaycommon.RelayInfo{ FinalRequestRelayFormat: types.RelayFormatClaude, OriginModelName: "anthropic/claude-3.7-sonnet", ChannelMeta: &relaycommon.ChannelMeta{ ChannelType: constant.ChannelTypeOpenRouter, }, PriceData: types.PriceData{ ModelRatio: 1, CompletionRatio: 1, CacheRatio: 0.1, CacheCreationRatio: 1.25, GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, }, StartTime: time.Now(), } usage := &dto.Usage{ PromptTokens: 2604, CompletionTokens: 383, PromptTokensDetails: dto.InputTokenDetails{ CachedTokens: 2432, }, } summary := calculateTextQuotaSummary(ctx, relayInfo, usage) // Pre-PR PostClaudeConsumeQuota behavior for OpenRouter: // prompt = 2604 - 2432 = 172 // quota = 172 + 2432*0.1 + 383 = 798.2 => 798 require.True(t, summary.IsClaudeUsageSemantic) require.Equal(t, 172, summary.PromptTokens) require.Equal(t, 798, summary.Quota) }