From 9ecad90652ed94159a0e7f454f426fa435f0b946 Mon Sep 17 00:00:00 2001 From: Seefs Date: Mon, 23 Mar 2026 14:22:12 +0800 Subject: [PATCH] refactor: optimize billing flow for OpenAI-to-Anthropic convert --- dto/openai_response.go | 12 +- relay/audio_handler.go | 2 +- relay/channel/claude/relay-claude.go | 40 +- relay/channel/claude/relay_claude_test.go | 82 +++++ relay/claude_handler.go | 4 +- relay/compatible_handler.go | 295 +-------------- relay/embedding_handler.go | 2 +- relay/gemini_handler.go | 4 +- relay/image_handler.go | 2 +- relay/rerank_handler.go | 2 +- relay/responses_handler.go | 4 +- service/convert.go | 46 +-- service/log_info_generate.go | 12 + service/quota.go | 102 ------ service/text_quota.go | 427 ++++++++++++++++++++++ service/text_quota_test.go | 206 +++++++++++ 16 files changed, 809 insertions(+), 433 deletions(-) create mode 100644 service/text_quota.go create mode 100644 service/text_quota_test.go diff --git a/dto/openai_response.go b/dto/openai_response.go index 1378c4f6..b5bc7b0d 100644 --- a/dto/openai_response.go +++ b/dto/openai_response.go @@ -220,10 +220,12 @@ type CompletionsStreamResponse struct { } type Usage struct { - PromptTokens int `json:"prompt_tokens"` - CompletionTokens int `json:"completion_tokens"` - TotalTokens int `json:"total_tokens"` - PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"` + PromptTokens int `json:"prompt_tokens"` + CompletionTokens int `json:"completion_tokens"` + TotalTokens int `json:"total_tokens"` + PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"` + UsageSemantic string `json:"usage_semantic,omitempty"` + UsageSource string `json:"usage_source,omitempty"` PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"` CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"` @@ -251,7 +253,7 @@ type OpenAIVideoResponse struct { type InputTokenDetails struct { CachedTokens int `json:"cached_tokens"` - CachedCreationTokens int `json:"-"` + CachedCreationTokens int `json:"cached_creation_tokens,omitempty"` TextTokens int `json:"text_tokens"` AudioTokens int `json:"audio_tokens"` ImageTokens int `json:"image_tokens"` diff --git a/relay/audio_handler.go b/relay/audio_handler.go index 5c34b792..7d2a4f22 100644 --- a/relay/audio_handler.go +++ b/relay/audio_handler.go @@ -70,7 +70,7 @@ func AudioHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type if usage.(*dto.Usage).CompletionTokenDetails.AudioTokens > 0 || usage.(*dto.Usage).PromptTokensDetails.AudioTokens > 0 { service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "") } else { - postConsumeQuota(c, info, usage.(*dto.Usage)) + service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil) } return nil diff --git a/relay/channel/claude/relay-claude.go b/relay/channel/claude/relay-claude.go index 0636ecd4..63e8c464 100644 --- a/relay/channel/claude/relay-claude.go +++ b/relay/channel/claude/relay-claude.go @@ -555,6 +555,35 @@ type ClaudeResponseInfo struct { Done bool } +func cacheCreationTokensForOpenAIUsage(usage *dto.Usage) int { + if usage == nil { + return 0 + } + splitCacheCreationTokens := usage.ClaudeCacheCreation5mTokens + usage.ClaudeCacheCreation1hTokens + if splitCacheCreationTokens == 0 { + return usage.PromptTokensDetails.CachedCreationTokens + } + if usage.PromptTokensDetails.CachedCreationTokens > splitCacheCreationTokens { + return usage.PromptTokensDetails.CachedCreationTokens + } + return splitCacheCreationTokens +} + +func buildOpenAIStyleUsageFromClaudeUsage(usage *dto.Usage) dto.Usage { + if usage == nil { + return dto.Usage{} + } + clone := *usage + cacheCreationTokens := cacheCreationTokensForOpenAIUsage(usage) + totalInputTokens := usage.PromptTokens + usage.PromptTokensDetails.CachedTokens + cacheCreationTokens + clone.PromptTokens = totalInputTokens + clone.InputTokens = totalInputTokens + clone.TotalTokens = totalInputTokens + usage.CompletionTokens + clone.UsageSemantic = "openai" + clone.UsageSource = "anthropic" + return clone +} + func buildMessageDeltaPatchUsage(claudeResponse *dto.ClaudeResponse, claudeInfo *ClaudeResponseInfo) *dto.ClaudeUsage { usage := &dto.ClaudeUsage{} if claudeResponse != nil && claudeResponse.Usage != nil { @@ -643,6 +672,7 @@ func FormatClaudeResponseInfo(claudeResponse *dto.ClaudeResponse, oaiResponse *d // message_start, 获取usage if claudeResponse.Message != nil && claudeResponse.Message.Usage != nil { claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens + claudeInfo.Usage.UsageSemantic = "anthropic" claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Message.Usage.GetCacheCreation5mTokens() @@ -661,6 +691,7 @@ func FormatClaudeResponseInfo(claudeResponse *dto.ClaudeResponse, oaiResponse *d } else if claudeResponse.Type == "message_delta" { // 最终的usage获取 if claudeResponse.Usage != nil { + claudeInfo.Usage.UsageSemantic = "anthropic" if claudeResponse.Usage.InputTokens > 0 { // 不叠加,只取最新的 claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens @@ -754,12 +785,16 @@ func HandleStreamFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, clau } claudeInfo.Usage = service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens) } + if claudeInfo.Usage != nil { + claudeInfo.Usage.UsageSemantic = "anthropic" + } if info.RelayFormat == types.RelayFormatClaude { // } else if info.RelayFormat == types.RelayFormatOpenAI { if info.ShouldIncludeUsage { - response := helper.GenerateFinalUsageResponse(claudeInfo.ResponseId, claudeInfo.Created, info.UpstreamModelName, *claudeInfo.Usage) + openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(claudeInfo.Usage) + response := helper.GenerateFinalUsageResponse(claudeInfo.ResponseId, claudeInfo.Created, info.UpstreamModelName, openAIUsage) err := helper.ObjectData(c, response) if err != nil { common.SysLog("send final response failed: " + err.Error()) @@ -810,6 +845,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens + claudeInfo.Usage.UsageSemantic = "anthropic" claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Usage.GetCacheCreation5mTokens() @@ -819,7 +855,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud switch info.RelayFormat { case types.RelayFormatOpenAI: openaiResponse := ResponseClaude2OpenAI(&claudeResponse) - openaiResponse.Usage = *claudeInfo.Usage + openaiResponse.Usage = buildOpenAIStyleUsageFromClaudeUsage(claudeInfo.Usage) responseData, err = json.Marshal(openaiResponse) if err != nil { return types.NewError(err, types.ErrorCodeBadResponseBody) diff --git a/relay/channel/claude/relay_claude_test.go b/relay/channel/claude/relay_claude_test.go index e34c861a..4e4004d8 100644 --- a/relay/channel/claude/relay_claude_test.go +++ b/relay/channel/claude/relay_claude_test.go @@ -173,3 +173,85 @@ func TestFormatClaudeResponseInfo_ContentBlockDelta(t *testing.T) { t.Errorf("ResponseText = %q, want %q", claudeInfo.ResponseText.String(), "hello") } } + +func TestBuildOpenAIStyleUsageFromClaudeUsage(t *testing.T) { + usage := &dto.Usage{ + PromptTokens: 100, + CompletionTokens: 20, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 30, + CachedCreationTokens: 50, + }, + ClaudeCacheCreation5mTokens: 10, + ClaudeCacheCreation1hTokens: 20, + UsageSemantic: "anthropic", + } + + openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage) + + if openAIUsage.PromptTokens != 180 { + t.Fatalf("PromptTokens = %d, want 180", openAIUsage.PromptTokens) + } + if openAIUsage.InputTokens != 180 { + t.Fatalf("InputTokens = %d, want 180", openAIUsage.InputTokens) + } + if openAIUsage.TotalTokens != 200 { + t.Fatalf("TotalTokens = %d, want 200", openAIUsage.TotalTokens) + } + if openAIUsage.UsageSemantic != "openai" { + t.Fatalf("UsageSemantic = %s, want openai", openAIUsage.UsageSemantic) + } + if openAIUsage.UsageSource != "anthropic" { + t.Fatalf("UsageSource = %s, want anthropic", openAIUsage.UsageSource) + } +} + +func TestBuildOpenAIStyleUsageFromClaudeUsagePreservesCacheCreationRemainder(t *testing.T) { + tests := []struct { + name string + cachedCreationTokens int + cacheCreationTokens5m int + cacheCreationTokens1h int + expectedTotalInputToken int + }{ + { + name: "prefers aggregate when it includes remainder", + cachedCreationTokens: 50, + cacheCreationTokens5m: 10, + cacheCreationTokens1h: 20, + expectedTotalInputToken: 180, + }, + { + name: "falls back to split tokens when aggregate missing", + cachedCreationTokens: 0, + cacheCreationTokens5m: 10, + cacheCreationTokens1h: 20, + expectedTotalInputToken: 160, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + usage := &dto.Usage{ + PromptTokens: 100, + CompletionTokens: 20, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 30, + CachedCreationTokens: tt.cachedCreationTokens, + }, + ClaudeCacheCreation5mTokens: tt.cacheCreationTokens5m, + ClaudeCacheCreation1hTokens: tt.cacheCreationTokens1h, + UsageSemantic: "anthropic", + } + + openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage) + + if openAIUsage.PromptTokens != tt.expectedTotalInputToken { + t.Fatalf("PromptTokens = %d, want %d", openAIUsage.PromptTokens, tt.expectedTotalInputToken) + } + if openAIUsage.InputTokens != tt.expectedTotalInputToken { + t.Fatalf("InputTokens = %d, want %d", openAIUsage.InputTokens, tt.expectedTotalInputToken) + } + }) + } +} diff --git a/relay/claude_handler.go b/relay/claude_handler.go index dbdb3663..dc4c93f8 100644 --- a/relay/claude_handler.go +++ b/relay/claude_handler.go @@ -122,7 +122,7 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ return newApiErr } - service.PostClaudeConsumeQuota(c, info, usage) + service.PostTextConsumeQuota(c, info, usage, nil) return nil } @@ -190,6 +190,6 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ return newAPIError } - service.PostClaudeConsumeQuota(c, info, usage.(*dto.Usage)) + service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil) return nil } diff --git a/relay/compatible_handler.go b/relay/compatible_handler.go index f60a485b..7a5624eb 100644 --- a/relay/compatible_handler.go +++ b/relay/compatible_handler.go @@ -6,25 +6,20 @@ import ( "io" "net/http" "strings" - "time" "github.com/QuantumNous/new-api/common" "github.com/QuantumNous/new-api/constant" "github.com/QuantumNous/new-api/dto" "github.com/QuantumNous/new-api/logger" - "github.com/QuantumNous/new-api/model" relaycommon "github.com/QuantumNous/new-api/relay/common" relayconstant "github.com/QuantumNous/new-api/relay/constant" "github.com/QuantumNous/new-api/relay/helper" "github.com/QuantumNous/new-api/service" "github.com/QuantumNous/new-api/setting/model_setting" - "github.com/QuantumNous/new-api/setting/operation_setting" "github.com/QuantumNous/new-api/setting/ratio_setting" "github.com/QuantumNous/new-api/types" "github.com/samber/lo" - "github.com/shopspring/decimal" - "github.com/gin-gonic/gin" ) @@ -93,7 +88,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types if containAudioTokens && containsAudioRatios { service.PostAudioConsumeQuota(c, info, usage, "") } else { - postConsumeQuota(c, info, usage) + service.PostTextConsumeQuota(c, info, usage, nil) } return nil } @@ -216,293 +211,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types if containAudioTokens && containsAudioRatios { service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "") } else { - postConsumeQuota(c, info, usage.(*dto.Usage)) + service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil) } return nil } - -func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent ...string) { - originUsage := usage - if usage == nil { - usage = &dto.Usage{ - PromptTokens: relayInfo.GetEstimatePromptTokens(), - CompletionTokens: 0, - TotalTokens: relayInfo.GetEstimatePromptTokens(), - } - extraContent = append(extraContent, "上游无计费信息") - } - - if originUsage != nil { - service.ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat()) - } - - adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason) - - useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix() - promptTokens := usage.PromptTokens - cacheTokens := usage.PromptTokensDetails.CachedTokens - imageTokens := usage.PromptTokensDetails.ImageTokens - audioTokens := usage.PromptTokensDetails.AudioTokens - completionTokens := usage.CompletionTokens - cachedCreationTokens := usage.PromptTokensDetails.CachedCreationTokens - - modelName := relayInfo.OriginModelName - - tokenName := ctx.GetString("token_name") - completionRatio := relayInfo.PriceData.CompletionRatio - cacheRatio := relayInfo.PriceData.CacheRatio - imageRatio := relayInfo.PriceData.ImageRatio - modelRatio := relayInfo.PriceData.ModelRatio - groupRatio := relayInfo.PriceData.GroupRatioInfo.GroupRatio - modelPrice := relayInfo.PriceData.ModelPrice - cachedCreationRatio := relayInfo.PriceData.CacheCreationRatio - - // Convert values to decimal for precise calculation - dPromptTokens := decimal.NewFromInt(int64(promptTokens)) - dCacheTokens := decimal.NewFromInt(int64(cacheTokens)) - dImageTokens := decimal.NewFromInt(int64(imageTokens)) - dAudioTokens := decimal.NewFromInt(int64(audioTokens)) - dCompletionTokens := decimal.NewFromInt(int64(completionTokens)) - dCachedCreationTokens := decimal.NewFromInt(int64(cachedCreationTokens)) - dCompletionRatio := decimal.NewFromFloat(completionRatio) - dCacheRatio := decimal.NewFromFloat(cacheRatio) - dImageRatio := decimal.NewFromFloat(imageRatio) - dModelRatio := decimal.NewFromFloat(modelRatio) - dGroupRatio := decimal.NewFromFloat(groupRatio) - dModelPrice := decimal.NewFromFloat(modelPrice) - dCachedCreationRatio := decimal.NewFromFloat(cachedCreationRatio) - dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit) - - ratio := dModelRatio.Mul(dGroupRatio) - - // openai web search 工具计费 - var dWebSearchQuota decimal.Decimal - var webSearchPrice float64 - // response api 格式工具计费 - if relayInfo.ResponsesUsageInfo != nil { - if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 { - // 计算 web search 调用的配额 (配额 = 价格 * 调用次数 / 1000 * 分组倍率) - webSearchPrice = operation_setting.GetWebSearchPricePerThousand(modelName, webSearchTool.SearchContextSize) - dWebSearchQuota = decimal.NewFromFloat(webSearchPrice). - Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))). - Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit) - extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 %d 次,上下文大小 %s,调用花费 %s", - webSearchTool.CallCount, webSearchTool.SearchContextSize, dWebSearchQuota.String())) - } - } else if strings.HasSuffix(modelName, "search-preview") { - // search-preview 模型不支持 response api - searchContextSize := ctx.GetString("chat_completion_web_search_context_size") - if searchContextSize == "" { - searchContextSize = "medium" - } - webSearchPrice = operation_setting.GetWebSearchPricePerThousand(modelName, searchContextSize) - dWebSearchQuota = decimal.NewFromFloat(webSearchPrice). - Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit) - extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 1 次,上下文大小 %s,调用花费 %s", - searchContextSize, dWebSearchQuota.String())) - } - // claude web search tool 计费 - var dClaudeWebSearchQuota decimal.Decimal - var claudeWebSearchPrice float64 - claudeWebSearchCallCount := ctx.GetInt("claude_web_search_requests") - if claudeWebSearchCallCount > 0 { - claudeWebSearchPrice = operation_setting.GetClaudeWebSearchPricePerThousand() - dClaudeWebSearchQuota = decimal.NewFromFloat(claudeWebSearchPrice). - Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit).Mul(decimal.NewFromInt(int64(claudeWebSearchCallCount))) - extraContent = append(extraContent, fmt.Sprintf("Claude Web Search 调用 %d 次,调用花费 %s", - claudeWebSearchCallCount, dClaudeWebSearchQuota.String())) - } - // file search tool 计费 - var dFileSearchQuota decimal.Decimal - var fileSearchPrice float64 - if relayInfo.ResponsesUsageInfo != nil { - if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 { - fileSearchPrice = operation_setting.GetFileSearchPricePerThousand() - dFileSearchQuota = decimal.NewFromFloat(fileSearchPrice). - Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))). - Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit) - extraContent = append(extraContent, fmt.Sprintf("File Search 调用 %d 次,调用花费 %s", - fileSearchTool.CallCount, dFileSearchQuota.String())) - } - } - var dImageGenerationCallQuota decimal.Decimal - var imageGenerationCallPrice float64 - if ctx.GetBool("image_generation_call") { - imageGenerationCallPrice = operation_setting.GetGPTImage1PriceOnceCall(ctx.GetString("image_generation_call_quality"), ctx.GetString("image_generation_call_size")) - dImageGenerationCallQuota = decimal.NewFromFloat(imageGenerationCallPrice).Mul(dGroupRatio).Mul(dQuotaPerUnit) - extraContent = append(extraContent, fmt.Sprintf("Image Generation Call 花费 %s", dImageGenerationCallQuota.String())) - } - - var quotaCalculateDecimal decimal.Decimal - - var audioInputQuota decimal.Decimal - var audioInputPrice float64 - isClaudeUsageSemantic := relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude - if !relayInfo.PriceData.UsePrice { - baseTokens := dPromptTokens - // 减去 cached tokens - // Anthropic API 的 input_tokens 已经不包含缓存 tokens,不需要减去 - // OpenAI/OpenRouter 等 API 的 prompt_tokens 包含缓存 tokens,需要减去 - var cachedTokensWithRatio decimal.Decimal - if !dCacheTokens.IsZero() { - if !isClaudeUsageSemantic { - baseTokens = baseTokens.Sub(dCacheTokens) - } - cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio) - } - var dCachedCreationTokensWithRatio decimal.Decimal - if !dCachedCreationTokens.IsZero() { - if !isClaudeUsageSemantic { - baseTokens = baseTokens.Sub(dCachedCreationTokens) - } - dCachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCachedCreationRatio) - } - - // 减去 image tokens - var imageTokensWithRatio decimal.Decimal - if !dImageTokens.IsZero() { - baseTokens = baseTokens.Sub(dImageTokens) - imageTokensWithRatio = dImageTokens.Mul(dImageRatio) - } - - // 减去 Gemini audio tokens - if !dAudioTokens.IsZero() { - audioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(modelName) - if audioInputPrice > 0 { - // 重新计算 base tokens - baseTokens = baseTokens.Sub(dAudioTokens) - audioInputQuota = decimal.NewFromFloat(audioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit) - extraContent = append(extraContent, fmt.Sprintf("Audio Input 花费 %s", audioInputQuota.String())) - } - } - promptQuota := baseTokens.Add(cachedTokensWithRatio). - Add(imageTokensWithRatio). - Add(dCachedCreationTokensWithRatio) - - completionQuota := dCompletionTokens.Mul(dCompletionRatio) - - quotaCalculateDecimal = promptQuota.Add(completionQuota).Mul(ratio) - - if !ratio.IsZero() && quotaCalculateDecimal.LessThanOrEqual(decimal.Zero) { - quotaCalculateDecimal = decimal.NewFromInt(1) - } - } else { - quotaCalculateDecimal = dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio) - } - // 添加 responses tools call 调用的配额 - quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota) - quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota) - // 添加 audio input 独立计费 - quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota) - // 添加 image generation call 计费 - quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota) - - if len(relayInfo.PriceData.OtherRatios) > 0 { - for key, otherRatio := range relayInfo.PriceData.OtherRatios { - dOtherRatio := decimal.NewFromFloat(otherRatio) - quotaCalculateDecimal = quotaCalculateDecimal.Mul(dOtherRatio) - extraContent = append(extraContent, fmt.Sprintf("其他倍率 %s: %f", key, otherRatio)) - } - } - - quota := int(quotaCalculateDecimal.Round(0).IntPart()) - totalTokens := promptTokens + completionTokens - - //var logContent string - - // record all the consume log even if quota is 0 - if totalTokens == 0 { - // in this case, must be some error happened - // we cannot just return, because we may have to return the pre-consumed quota - quota = 0 - extraContent = append(extraContent, "上游没有返回计费信息,无法扣费(可能是上游超时)") - logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+ - "tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota)) - } else { - if !ratio.IsZero() && quota == 0 { - quota = 1 - } - model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota) - model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota) - } - - if err := service.SettleBilling(ctx, relayInfo, quota); err != nil { - logger.LogError(ctx, "error settling billing: "+err.Error()) - } - - logModel := modelName - if strings.HasPrefix(logModel, "gpt-4-gizmo") { - logModel = "gpt-4-gizmo-*" - extraContent = append(extraContent, fmt.Sprintf("模型 %s", modelName)) - } - if strings.HasPrefix(logModel, "gpt-4o-gizmo") { - logModel = "gpt-4o-gizmo-*" - extraContent = append(extraContent, fmt.Sprintf("模型 %s", modelName)) - } - logContent := strings.Join(extraContent, ", ") - other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio) - if adminRejectReason != "" { - other["reject_reason"] = adminRejectReason - } - // For chat-based calls to the Claude model, tagging is required. Using Claude's rendering logs, the two approaches handle input rendering differently. - if isClaudeUsageSemantic { - other["claude"] = true - other["usage_semantic"] = "anthropic" - } - if imageTokens != 0 { - other["image"] = true - other["image_ratio"] = imageRatio - other["image_output"] = imageTokens - } - if cachedCreationTokens != 0 { - other["cache_creation_tokens"] = cachedCreationTokens - other["cache_creation_ratio"] = cachedCreationRatio - } - if !dWebSearchQuota.IsZero() { - if relayInfo.ResponsesUsageInfo != nil { - if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists { - other["web_search"] = true - other["web_search_call_count"] = webSearchTool.CallCount - other["web_search_price"] = webSearchPrice - } - } else if strings.HasSuffix(modelName, "search-preview") { - other["web_search"] = true - other["web_search_call_count"] = 1 - other["web_search_price"] = webSearchPrice - } - } else if !dClaudeWebSearchQuota.IsZero() { - other["web_search"] = true - other["web_search_call_count"] = claudeWebSearchCallCount - other["web_search_price"] = claudeWebSearchPrice - } - if !dFileSearchQuota.IsZero() && relayInfo.ResponsesUsageInfo != nil { - if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists { - other["file_search"] = true - other["file_search_call_count"] = fileSearchTool.CallCount - other["file_search_price"] = fileSearchPrice - } - } - if !audioInputQuota.IsZero() { - other["audio_input_seperate_price"] = true - other["audio_input_token_count"] = audioTokens - other["audio_input_price"] = audioInputPrice - } - if !dImageGenerationCallQuota.IsZero() { - other["image_generation_call"] = true - other["image_generation_call_price"] = imageGenerationCallPrice - } - model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{ - ChannelId: relayInfo.ChannelId, - PromptTokens: promptTokens, - CompletionTokens: completionTokens, - ModelName: logModel, - TokenName: tokenName, - Quota: quota, - Content: logContent, - TokenId: relayInfo.TokenId, - UseTimeSeconds: int(useTimeSeconds), - IsStream: relayInfo.IsStream, - Group: relayInfo.UsingGroup, - Other: other, - }) -} diff --git a/relay/embedding_handler.go b/relay/embedding_handler.go index d8ca4223..393c0d72 100644 --- a/relay/embedding_handler.go +++ b/relay/embedding_handler.go @@ -82,6 +82,6 @@ func EmbeddingHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError * service.ResetStatusCode(newAPIError, statusCodeMappingStr) return newAPIError } - postConsumeQuota(c, info, usage.(*dto.Usage)) + service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil) return nil } diff --git a/relay/gemini_handler.go b/relay/gemini_handler.go index 39bd44e6..e663a28b 100644 --- a/relay/gemini_handler.go +++ b/relay/gemini_handler.go @@ -194,7 +194,7 @@ func GeminiHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ return openaiErr } - postConsumeQuota(c, info, usage.(*dto.Usage)) + service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil) return nil } @@ -288,6 +288,6 @@ func GeminiEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo) (newAPI return openaiErr } - postConsumeQuota(c, info, usage.(*dto.Usage)) + service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil) return nil } diff --git a/relay/image_handler.go b/relay/image_handler.go index a86b980b..481c1cd2 100644 --- a/relay/image_handler.go +++ b/relay/image_handler.go @@ -141,6 +141,6 @@ func ImageHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type logContent = append(logContent, fmt.Sprintf("生成数量 %d", imageN)) } - postConsumeQuota(c, info, usage.(*dto.Usage), logContent...) + service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), logContent) return nil } diff --git a/relay/rerank_handler.go b/relay/rerank_handler.go index 40d686f7..53cd6e47 100644 --- a/relay/rerank_handler.go +++ b/relay/rerank_handler.go @@ -96,6 +96,6 @@ func RerankHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ service.ResetStatusCode(newAPIError, statusCodeMappingStr) return newAPIError } - postConsumeQuota(c, info, usage.(*dto.Usage)) + service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil) return nil } diff --git a/relay/responses_handler.go b/relay/responses_handler.go index 18f1b711..09e490d9 100644 --- a/relay/responses_handler.go +++ b/relay/responses_handler.go @@ -145,7 +145,7 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError * info.PriceData = originPriceData return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry()) } - postConsumeQuota(c, info, usageDto) + service.PostTextConsumeQuota(c, info, usageDto, nil) info.OriginModelName = originModelName info.PriceData = originPriceData @@ -155,7 +155,7 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError * if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") { service.PostAudioConsumeQuota(c, info, usageDto, "") } else { - postConsumeQuota(c, info, usageDto) + service.PostTextConsumeQuota(c, info, usageDto, nil) } return nil } diff --git a/service/convert.go b/service/convert.go index 7efaba6c..7b5f3946 100644 --- a/service/convert.go +++ b/service/convert.go @@ -223,6 +223,25 @@ func generateStopBlock(index int) *dto.ClaudeResponse { } } +func buildClaudeUsageFromOpenAIUsage(oaiUsage *dto.Usage) *dto.ClaudeUsage { + if oaiUsage == nil { + return nil + } + usage := &dto.ClaudeUsage{ + InputTokens: oaiUsage.PromptTokens, + OutputTokens: oaiUsage.CompletionTokens, + CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens, + CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens, + } + if oaiUsage.ClaudeCacheCreation5mTokens > 0 || oaiUsage.ClaudeCacheCreation1hTokens > 0 { + usage.CacheCreation = &dto.ClaudeCacheCreationUsage{ + Ephemeral5mInputTokens: oaiUsage.ClaudeCacheCreation5mTokens, + Ephemeral1hInputTokens: oaiUsage.ClaudeCacheCreation1hTokens, + } + } + return usage +} + func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse { if info.ClaudeConvertInfo.Done { return nil @@ -391,13 +410,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon } if oaiUsage != nil { claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ - Type: "message_delta", - Usage: &dto.ClaudeUsage{ - InputTokens: oaiUsage.PromptTokens, - OutputTokens: oaiUsage.CompletionTokens, - CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens, - CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens, - }, + Type: "message_delta", + Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage), Delta: &dto.ClaudeMediaMessage{ StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)), }, @@ -419,13 +433,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon oaiUsage := info.ClaudeConvertInfo.Usage if oaiUsage != nil { claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ - Type: "message_delta", - Usage: &dto.ClaudeUsage{ - InputTokens: oaiUsage.PromptTokens, - OutputTokens: oaiUsage.CompletionTokens, - CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens, - CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens, - }, + Type: "message_delta", + Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage), Delta: &dto.ClaudeMediaMessage{ StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)), }, @@ -555,13 +564,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon } if oaiUsage != nil { claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ - Type: "message_delta", - Usage: &dto.ClaudeUsage{ - InputTokens: oaiUsage.PromptTokens, - OutputTokens: oaiUsage.CompletionTokens, - CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens, - CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens, - }, + Type: "message_delta", + Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage), Delta: &dto.ClaudeMediaMessage{ StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)), }, diff --git a/service/log_info_generate.go b/service/log_info_generate.go index eea2ea07..373e32d6 100644 --- a/service/log_info_generate.go +++ b/service/log_info_generate.go @@ -73,6 +73,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m other["admin_info"] = adminInfo appendRequestPath(ctx, relayInfo, other) appendRequestConversionChain(relayInfo, other) + appendFinalRequestFormat(relayInfo, other) appendBillingInfo(relayInfo, other) appendParamOverrideInfo(relayInfo, other) return other @@ -167,6 +168,17 @@ func appendRequestConversionChain(relayInfo *relaycommon.RelayInfo, other map[st other["request_conversion"] = chain } +func appendFinalRequestFormat(relayInfo *relaycommon.RelayInfo, other map[string]interface{}) { + if relayInfo == nil || other == nil { + return + } + if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude { + // claude indicates the final upstream request format is Claude Messages. + // Frontend log rendering uses this to keep the original Claude input display. + other["claude"] = true + } +} + func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice, userGroupRatio float64) map[string]interface{} { info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice, userGroupRatio) info["ws"] = true diff --git a/service/quota.go b/service/quota.go index 7ee70edd..9dc84ab4 100644 --- a/service/quota.go +++ b/service/quota.go @@ -235,108 +235,6 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod }) } -func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) { - if usage != nil { - ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat()) - } - - useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix() - promptTokens := usage.PromptTokens - completionTokens := usage.CompletionTokens - modelName := relayInfo.OriginModelName - - tokenName := ctx.GetString("token_name") - completionRatio := relayInfo.PriceData.CompletionRatio - modelRatio := relayInfo.PriceData.ModelRatio - groupRatio := relayInfo.PriceData.GroupRatioInfo.GroupRatio - modelPrice := relayInfo.PriceData.ModelPrice - cacheRatio := relayInfo.PriceData.CacheRatio - cacheTokens := usage.PromptTokensDetails.CachedTokens - - cacheCreationRatio := relayInfo.PriceData.CacheCreationRatio - cacheCreationRatio5m := relayInfo.PriceData.CacheCreation5mRatio - cacheCreationRatio1h := relayInfo.PriceData.CacheCreation1hRatio - cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens - cacheCreationTokens5m := usage.ClaudeCacheCreation5mTokens - cacheCreationTokens1h := usage.ClaudeCacheCreation1hTokens - - if relayInfo.ChannelType == constant.ChannelTypeOpenRouter { - promptTokens -= cacheTokens - isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(modelName, relayInfo.PriceData.ModelRatio) - if cacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings { - maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData) - if maybeCacheCreationTokens >= 0 && promptTokens >= maybeCacheCreationTokens { - cacheCreationTokens = maybeCacheCreationTokens - } - } - promptTokens -= cacheCreationTokens - } - - calculateQuota := 0.0 - if !relayInfo.PriceData.UsePrice { - calculateQuota = float64(promptTokens) - calculateQuota += float64(cacheTokens) * cacheRatio - calculateQuota += float64(cacheCreationTokens5m) * cacheCreationRatio5m - calculateQuota += float64(cacheCreationTokens1h) * cacheCreationRatio1h - remainingCacheCreationTokens := cacheCreationTokens - cacheCreationTokens5m - cacheCreationTokens1h - if remainingCacheCreationTokens > 0 { - calculateQuota += float64(remainingCacheCreationTokens) * cacheCreationRatio - } - calculateQuota += float64(completionTokens) * completionRatio - calculateQuota = calculateQuota * groupRatio * modelRatio - } else { - calculateQuota = modelPrice * common.QuotaPerUnit * groupRatio - } - - if modelRatio != 0 && calculateQuota <= 0 { - calculateQuota = 1 - } - - quota := int(calculateQuota) - - totalTokens := promptTokens + completionTokens - - var logContent string - // record all the consume log even if quota is 0 - if totalTokens == 0 { - // in this case, must be some error happened - // we cannot just return, because we may have to return the pre-consumed quota - quota = 0 - logContent += fmt.Sprintf("(可能是上游出错)") - logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+ - "tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota)) - } else { - model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota) - model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota) - } - - if err := SettleBilling(ctx, relayInfo, quota); err != nil { - logger.LogError(ctx, "error settling billing: "+err.Error()) - } - - other := GenerateClaudeOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, - cacheTokens, cacheRatio, - cacheCreationTokens, cacheCreationRatio, - cacheCreationTokens5m, cacheCreationRatio5m, - cacheCreationTokens1h, cacheCreationRatio1h, - modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio) - model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{ - ChannelId: relayInfo.ChannelId, - PromptTokens: promptTokens, - CompletionTokens: completionTokens, - ModelName: modelName, - TokenName: tokenName, - Quota: quota, - Content: logContent, - TokenId: relayInfo.TokenId, - UseTimeSeconds: int(useTimeSeconds), - IsStream: relayInfo.IsStream, - Group: relayInfo.UsingGroup, - Other: other, - }) - -} - func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData types.PriceData) int { if priceData.CacheCreationRatio == 1 { return 0 diff --git a/service/text_quota.go b/service/text_quota.go new file mode 100644 index 00000000..a300097e --- /dev/null +++ b/service/text_quota.go @@ -0,0 +1,427 @@ +package service + +import ( + "fmt" + "strings" + "time" + + "github.com/QuantumNous/new-api/common" + "github.com/QuantumNous/new-api/constant" + "github.com/QuantumNous/new-api/dto" + "github.com/QuantumNous/new-api/logger" + "github.com/QuantumNous/new-api/model" + relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/setting/operation_setting" + "github.com/QuantumNous/new-api/types" + + "github.com/gin-gonic/gin" + "github.com/shopspring/decimal" +) + +type textQuotaSummary struct { + PromptTokens int + CompletionTokens int + TotalTokens int + CacheTokens int + CacheCreationTokens int + CacheCreationTokens5m int + CacheCreationTokens1h int + ImageTokens int + AudioTokens int + ModelName string + TokenName string + UseTimeSeconds int64 + CompletionRatio float64 + CacheRatio float64 + ImageRatio float64 + ModelRatio float64 + GroupRatio float64 + ModelPrice float64 + CacheCreationRatio float64 + CacheCreationRatio5m float64 + CacheCreationRatio1h float64 + Quota int + IsClaudeUsageSemantic bool + UsageSemantic string + WebSearchPrice float64 + WebSearchCallCount int + ClaudeWebSearchPrice float64 + ClaudeWebSearchCallCount int + FileSearchPrice float64 + FileSearchCallCount int + AudioInputPrice float64 + ImageGenerationCallPrice float64 +} + +func cacheWriteTokensTotal(summary textQuotaSummary) int { + if summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0 { + splitCacheWriteTokens := summary.CacheCreationTokens5m + summary.CacheCreationTokens1h + if summary.CacheCreationTokens > splitCacheWriteTokens { + return summary.CacheCreationTokens + } + return splitCacheWriteTokens + } + return summary.CacheCreationTokens +} + +func isLegacyClaudeDerivedOpenAIUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) bool { + if relayInfo == nil || usage == nil { + return false + } + if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude { + return false + } + if usage.UsageSource != "" || usage.UsageSemantic != "" { + return false + } + return usage.ClaudeCacheCreation5mTokens > 0 || usage.ClaudeCacheCreation1hTokens > 0 +} + +func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) textQuotaSummary { + summary := textQuotaSummary{ + ModelName: relayInfo.OriginModelName, + TokenName: ctx.GetString("token_name"), + UseTimeSeconds: time.Now().Unix() - relayInfo.StartTime.Unix(), + CompletionRatio: relayInfo.PriceData.CompletionRatio, + CacheRatio: relayInfo.PriceData.CacheRatio, + ImageRatio: relayInfo.PriceData.ImageRatio, + ModelRatio: relayInfo.PriceData.ModelRatio, + GroupRatio: relayInfo.PriceData.GroupRatioInfo.GroupRatio, + ModelPrice: relayInfo.PriceData.ModelPrice, + CacheCreationRatio: relayInfo.PriceData.CacheCreationRatio, + CacheCreationRatio5m: relayInfo.PriceData.CacheCreation5mRatio, + CacheCreationRatio1h: relayInfo.PriceData.CacheCreation1hRatio, + UsageSemantic: usageSemanticFromUsage(relayInfo, usage), + } + summary.IsClaudeUsageSemantic = summary.UsageSemantic == "anthropic" + + if usage == nil { + usage = &dto.Usage{ + PromptTokens: relayInfo.GetEstimatePromptTokens(), + CompletionTokens: 0, + TotalTokens: relayInfo.GetEstimatePromptTokens(), + } + } + + summary.PromptTokens = usage.PromptTokens + summary.CompletionTokens = usage.CompletionTokens + summary.TotalTokens = usage.PromptTokens + usage.CompletionTokens + summary.CacheTokens = usage.PromptTokensDetails.CachedTokens + summary.CacheCreationTokens = usage.PromptTokensDetails.CachedCreationTokens + summary.CacheCreationTokens5m = usage.ClaudeCacheCreation5mTokens + summary.CacheCreationTokens1h = usage.ClaudeCacheCreation1hTokens + summary.ImageTokens = usage.PromptTokensDetails.ImageTokens + summary.AudioTokens = usage.PromptTokensDetails.AudioTokens + legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage) + + if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter { + summary.PromptTokens -= summary.CacheTokens + isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio) + if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings { + maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData) + if maybeCacheCreationTokens >= 0 && summary.PromptTokens >= maybeCacheCreationTokens { + summary.CacheCreationTokens = maybeCacheCreationTokens + } + } + summary.PromptTokens -= summary.CacheCreationTokens + } + + dPromptTokens := decimal.NewFromInt(int64(summary.PromptTokens)) + dCacheTokens := decimal.NewFromInt(int64(summary.CacheTokens)) + dImageTokens := decimal.NewFromInt(int64(summary.ImageTokens)) + dAudioTokens := decimal.NewFromInt(int64(summary.AudioTokens)) + dCompletionTokens := decimal.NewFromInt(int64(summary.CompletionTokens)) + dCachedCreationTokens := decimal.NewFromInt(int64(summary.CacheCreationTokens)) + dCompletionRatio := decimal.NewFromFloat(summary.CompletionRatio) + dCacheRatio := decimal.NewFromFloat(summary.CacheRatio) + dImageRatio := decimal.NewFromFloat(summary.ImageRatio) + dModelRatio := decimal.NewFromFloat(summary.ModelRatio) + dGroupRatio := decimal.NewFromFloat(summary.GroupRatio) + dModelPrice := decimal.NewFromFloat(summary.ModelPrice) + dCacheCreationRatio := decimal.NewFromFloat(summary.CacheCreationRatio) + dCacheCreationRatio5m := decimal.NewFromFloat(summary.CacheCreationRatio5m) + dCacheCreationRatio1h := decimal.NewFromFloat(summary.CacheCreationRatio1h) + dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit) + + ratio := dModelRatio.Mul(dGroupRatio) + + var dWebSearchQuota decimal.Decimal + if relayInfo.ResponsesUsageInfo != nil { + if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 { + summary.WebSearchCallCount = webSearchTool.CallCount + summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, webSearchTool.SearchContextSize) + dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice). + Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))). + Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit) + } + } else if strings.HasSuffix(summary.ModelName, "search-preview") { + searchContextSize := ctx.GetString("chat_completion_web_search_context_size") + if searchContextSize == "" { + searchContextSize = "medium" + } + summary.WebSearchCallCount = 1 + summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, searchContextSize) + dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice). + Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit) + } + + var dClaudeWebSearchQuota decimal.Decimal + summary.ClaudeWebSearchCallCount = ctx.GetInt("claude_web_search_requests") + if summary.ClaudeWebSearchCallCount > 0 { + summary.ClaudeWebSearchPrice = operation_setting.GetClaudeWebSearchPricePerThousand() + dClaudeWebSearchQuota = decimal.NewFromFloat(summary.ClaudeWebSearchPrice). + Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit). + Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount))) + } + + var dFileSearchQuota decimal.Decimal + if relayInfo.ResponsesUsageInfo != nil { + if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 { + summary.FileSearchCallCount = fileSearchTool.CallCount + summary.FileSearchPrice = operation_setting.GetFileSearchPricePerThousand() + dFileSearchQuota = decimal.NewFromFloat(summary.FileSearchPrice). + Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))). + Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit) + } + } + + var dImageGenerationCallQuota decimal.Decimal + if ctx.GetBool("image_generation_call") { + summary.ImageGenerationCallPrice = operation_setting.GetGPTImage1PriceOnceCall(ctx.GetString("image_generation_call_quality"), ctx.GetString("image_generation_call_size")) + dImageGenerationCallQuota = decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(dGroupRatio).Mul(dQuotaPerUnit) + } + + var audioInputQuota decimal.Decimal + if !relayInfo.PriceData.UsePrice { + baseTokens := dPromptTokens + + var cachedTokensWithRatio decimal.Decimal + if !dCacheTokens.IsZero() { + if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived { + baseTokens = baseTokens.Sub(dCacheTokens) + } + cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio) + } + + var cachedCreationTokensWithRatio decimal.Decimal + hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0 + if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens { + if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived { + baseTokens = baseTokens.Sub(dCachedCreationTokens) + cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio) + } else { + remaining := summary.CacheCreationTokens - summary.CacheCreationTokens5m - summary.CacheCreationTokens1h + if remaining < 0 { + remaining = 0 + } + cachedCreationTokensWithRatio = decimal.NewFromInt(int64(remaining)).Mul(dCacheCreationRatio) + cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens5m)).Mul(dCacheCreationRatio5m)) + cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens1h)).Mul(dCacheCreationRatio1h)) + } + } + + var imageTokensWithRatio decimal.Decimal + if !dImageTokens.IsZero() { + baseTokens = baseTokens.Sub(dImageTokens) + imageTokensWithRatio = dImageTokens.Mul(dImageRatio) + } + + if !dAudioTokens.IsZero() { + summary.AudioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(summary.ModelName) + if summary.AudioInputPrice > 0 { + baseTokens = baseTokens.Sub(dAudioTokens) + audioInputQuota = decimal.NewFromFloat(summary.AudioInputPrice). + Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit) + } + } + + promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio).Add(cachedCreationTokensWithRatio) + completionQuota := dCompletionTokens.Mul(dCompletionRatio) + quotaCalculateDecimal := promptQuota.Add(completionQuota).Mul(ratio) + quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota) + quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota) + quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota) + quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota) + quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota) + + if len(relayInfo.PriceData.OtherRatios) > 0 { + for _, otherRatio := range relayInfo.PriceData.OtherRatios { + quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio)) + } + } + + if !ratio.IsZero() && quotaCalculateDecimal.LessThanOrEqual(decimal.Zero) { + quotaCalculateDecimal = decimal.NewFromInt(1) + } + summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart()) + } else { + quotaCalculateDecimal := dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio) + quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota) + quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota) + quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota) + quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota) + quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota) + if len(relayInfo.PriceData.OtherRatios) > 0 { + for _, otherRatio := range relayInfo.PriceData.OtherRatios { + quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio)) + } + } + summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart()) + } + + if summary.TotalTokens == 0 { + summary.Quota = 0 + } else if !ratio.IsZero() && summary.Quota == 0 { + summary.Quota = 1 + } + + return summary +} + +func usageSemanticFromUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) string { + if usage != nil && usage.UsageSemantic != "" { + return usage.UsageSemantic + } + if relayInfo != nil && relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude { + return "anthropic" + } + return "openai" +} + +func PostTextConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent []string) { + originUsage := usage + if usage == nil { + extraContent = append(extraContent, "上游无计费信息") + } + if originUsage != nil { + ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat()) + } + + adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason) + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + if summary.WebSearchCallCount > 0 { + extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 %d 次,调用花费 %s", summary.WebSearchCallCount, decimal.NewFromFloat(summary.WebSearchPrice).Mul(decimal.NewFromInt(int64(summary.WebSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String())) + } + if summary.ClaudeWebSearchCallCount > 0 { + extraContent = append(extraContent, fmt.Sprintf("Claude Web Search 调用 %d 次,调用花费 %s", summary.ClaudeWebSearchCallCount, decimal.NewFromFloat(summary.ClaudeWebSearchPrice).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount))).String())) + } + if summary.FileSearchCallCount > 0 { + extraContent = append(extraContent, fmt.Sprintf("File Search 调用 %d 次,调用花费 %s", summary.FileSearchCallCount, decimal.NewFromFloat(summary.FileSearchPrice).Mul(decimal.NewFromInt(int64(summary.FileSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String())) + } + if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 { + extraContent = append(extraContent, fmt.Sprintf("Audio Input 花费 %s", decimal.NewFromFloat(summary.AudioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(decimal.NewFromInt(int64(summary.AudioTokens))).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String())) + } + if summary.ImageGenerationCallPrice > 0 { + extraContent = append(extraContent, fmt.Sprintf("Image Generation Call 花费 %s", decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String())) + } + + if summary.TotalTokens == 0 { + extraContent = append(extraContent, "上游没有返回计费信息,无法扣费(可能是上游超时)") + logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, summary.ModelName, relayInfo.FinalPreConsumedQuota)) + } else { + model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, summary.Quota) + model.UpdateChannelUsedQuota(relayInfo.ChannelId, summary.Quota) + } + + if err := SettleBilling(ctx, relayInfo, summary.Quota); err != nil { + logger.LogError(ctx, "error settling billing: "+err.Error()) + } + + logModel := summary.ModelName + if strings.HasPrefix(logModel, "gpt-4-gizmo") { + logModel = "gpt-4-gizmo-*" + extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName)) + } + if strings.HasPrefix(logModel, "gpt-4o-gizmo") { + logModel = "gpt-4o-gizmo-*" + extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName)) + } + + logContent := strings.Join(extraContent, ", ") + var other map[string]interface{} + if summary.IsClaudeUsageSemantic { + other = GenerateClaudeOtherInfo(ctx, relayInfo, + summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio, + summary.CacheTokens, summary.CacheRatio, + summary.CacheCreationTokens, summary.CacheCreationRatio, + summary.CacheCreationTokens5m, summary.CacheCreationRatio5m, + summary.CacheCreationTokens1h, summary.CacheCreationRatio1h, + summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio) + other["usage_semantic"] = "anthropic" + } else { + other = GenerateTextOtherInfo(ctx, relayInfo, summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio, summary.CacheTokens, summary.CacheRatio, summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio) + } + if adminRejectReason != "" { + other["reject_reason"] = adminRejectReason + } + if summary.ImageTokens != 0 { + other["image"] = true + other["image_ratio"] = summary.ImageRatio + other["image_output"] = summary.ImageTokens + } + if summary.WebSearchCallCount > 0 { + other["web_search"] = true + other["web_search_call_count"] = summary.WebSearchCallCount + other["web_search_price"] = summary.WebSearchPrice + } else if summary.ClaudeWebSearchCallCount > 0 { + other["web_search"] = true + other["web_search_call_count"] = summary.ClaudeWebSearchCallCount + other["web_search_price"] = summary.ClaudeWebSearchPrice + } + if summary.FileSearchCallCount > 0 { + other["file_search"] = true + other["file_search_call_count"] = summary.FileSearchCallCount + other["file_search_price"] = summary.FileSearchPrice + } + if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 { + other["audio_input_seperate_price"] = true + other["audio_input_token_count"] = summary.AudioTokens + other["audio_input_price"] = summary.AudioInputPrice + } + if summary.ImageGenerationCallPrice > 0 { + other["image_generation_call"] = true + other["image_generation_call_price"] = summary.ImageGenerationCallPrice + } + if summary.CacheCreationTokens > 0 { + other["cache_creation_tokens"] = summary.CacheCreationTokens + other["cache_creation_ratio"] = summary.CacheCreationRatio + } + if summary.CacheCreationTokens5m > 0 { + other["cache_creation_tokens_5m"] = summary.CacheCreationTokens5m + other["cache_creation_ratio_5m"] = summary.CacheCreationRatio5m + } + if summary.CacheCreationTokens1h > 0 { + other["cache_creation_tokens_1h"] = summary.CacheCreationTokens1h + other["cache_creation_ratio_1h"] = summary.CacheCreationRatio1h + } + cacheWriteTokens := cacheWriteTokensTotal(summary) + if cacheWriteTokens > 0 { + // cache_write_tokens: normalized cache creation total for UI display. + // If split 5m/1h values are present, this is their sum; otherwise it falls back + // to cache_creation_tokens. + other["cache_write_tokens"] = cacheWriteTokens + } + if relayInfo.GetFinalRequestRelayFormat() != types.RelayFormatClaude && usage != nil && usage.UsageSource != "" && usage.InputTokens > 0 { + // input_tokens_total: explicit normalized total input used by the usage log UI. + // Only write this field when upstream/current conversion has already provided a + // reliable total input value and tagged the usage source. Do not infer it from + // prompt/cache fields here, otherwise old upstream payloads may be double-counted. + other["input_tokens_total"] = usage.InputTokens + } + + model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{ + ChannelId: relayInfo.ChannelId, + PromptTokens: summary.PromptTokens, + CompletionTokens: summary.CompletionTokens, + ModelName: logModel, + TokenName: summary.TokenName, + Quota: summary.Quota, + Content: logContent, + TokenId: relayInfo.TokenId, + UseTimeSeconds: int(summary.UseTimeSeconds), + IsStream: relayInfo.IsStream, + Group: relayInfo.UsingGroup, + Other: other, + }) +} diff --git a/service/text_quota_test.go b/service/text_quota_test.go new file mode 100644 index 00000000..4370b16e --- /dev/null +++ b/service/text_quota_test.go @@ -0,0 +1,206 @@ +package service + +import ( + "net/http/httptest" + "testing" + "time" + + "github.com/QuantumNous/new-api/dto" + relaycommon "github.com/QuantumNous/new-api/relay/common" + "github.com/QuantumNous/new-api/types" + + "github.com/gin-gonic/gin" + "github.com/stretchr/testify/require" +) + +func TestCalculateTextQuotaSummaryUnifiedForClaudeSemantic(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + usage := &dto.Usage{ + PromptTokens: 1000, + CompletionTokens: 200, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 100, + CachedCreationTokens: 50, + }, + ClaudeCacheCreation5mTokens: 10, + ClaudeCacheCreation1hTokens: 20, + } + + priceData := types.PriceData{ + ModelRatio: 1, + CompletionRatio: 2, + CacheRatio: 0.1, + CacheCreationRatio: 1.25, + CacheCreation5mRatio: 1.25, + CacheCreation1hRatio: 2, + GroupRatioInfo: types.GroupRatioInfo{ + GroupRatio: 1, + }, + } + + chatRelayInfo := &relaycommon.RelayInfo{ + RelayFormat: types.RelayFormatOpenAI, + FinalRequestRelayFormat: types.RelayFormatClaude, + OriginModelName: "claude-3-7-sonnet", + PriceData: priceData, + StartTime: time.Now(), + } + messageRelayInfo := &relaycommon.RelayInfo{ + RelayFormat: types.RelayFormatClaude, + FinalRequestRelayFormat: types.RelayFormatClaude, + OriginModelName: "claude-3-7-sonnet", + PriceData: priceData, + StartTime: time.Now(), + } + + chatSummary := calculateTextQuotaSummary(ctx, chatRelayInfo, usage) + messageSummary := calculateTextQuotaSummary(ctx, messageRelayInfo, usage) + + require.Equal(t, messageSummary.Quota, chatSummary.Quota) + require.Equal(t, messageSummary.CacheCreationTokens5m, chatSummary.CacheCreationTokens5m) + require.Equal(t, messageSummary.CacheCreationTokens1h, chatSummary.CacheCreationTokens1h) + require.True(t, chatSummary.IsClaudeUsageSemantic) + require.Equal(t, 1488, chatSummary.Quota) +} + +func TestCalculateTextQuotaSummaryUsesSplitClaudeCacheCreationRatios(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + RelayFormat: types.RelayFormatOpenAI, + FinalRequestRelayFormat: types.RelayFormatClaude, + OriginModelName: "claude-3-7-sonnet", + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 1, + CacheRatio: 0, + CacheCreationRatio: 1, + CacheCreation5mRatio: 2, + CacheCreation1hRatio: 3, + GroupRatioInfo: types.GroupRatioInfo{ + GroupRatio: 1, + }, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 100, + CompletionTokens: 0, + PromptTokensDetails: dto.InputTokenDetails{ + CachedCreationTokens: 10, + }, + ClaudeCacheCreation5mTokens: 2, + ClaudeCacheCreation1hTokens: 3, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // 100 + remaining(5)*1 + 2*2 + 3*3 = 118 + require.Equal(t, 118, summary.Quota) +} + +func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + RelayFormat: types.RelayFormatOpenAI, + OriginModelName: "claude-3-7-sonnet", + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 2, + CacheRatio: 0.1, + CacheCreationRatio: 1.25, + CacheCreation5mRatio: 1.25, + CacheCreation1hRatio: 2, + GroupRatioInfo: types.GroupRatioInfo{ + GroupRatio: 1, + }, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 1000, + CompletionTokens: 200, + UsageSemantic: "anthropic", + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 100, + CachedCreationTokens: 50, + }, + ClaudeCacheCreation5mTokens: 10, + ClaudeCacheCreation1hTokens: 20, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + require.True(t, summary.IsClaudeUsageSemantic) + require.Equal(t, "anthropic", summary.UsageSemantic) + require.Equal(t, 1488, summary.Quota) +} + +func TestCacheWriteTokensTotal(t *testing.T) { + t.Run("split cache creation", func(t *testing.T) { + summary := textQuotaSummary{ + CacheCreationTokens: 50, + CacheCreationTokens5m: 10, + CacheCreationTokens1h: 20, + } + require.Equal(t, 50, cacheWriteTokensTotal(summary)) + }) + + t.Run("legacy cache creation", func(t *testing.T) { + summary := textQuotaSummary{CacheCreationTokens: 50} + require.Equal(t, 50, cacheWriteTokensTotal(summary)) + }) + + t.Run("split cache creation without aggregate remainder", func(t *testing.T) { + summary := textQuotaSummary{ + CacheCreationTokens5m: 10, + CacheCreationTokens1h: 20, + } + require.Equal(t, 30, cacheWriteTokensTotal(summary)) + }) +} + +func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testing.T) { + gin.SetMode(gin.TestMode) + w := httptest.NewRecorder() + ctx, _ := gin.CreateTestContext(w) + + relayInfo := &relaycommon.RelayInfo{ + RelayFormat: types.RelayFormatOpenAI, + OriginModelName: "claude-3-7-sonnet", + PriceData: types.PriceData{ + ModelRatio: 1, + CompletionRatio: 5, + CacheRatio: 0.1, + CacheCreationRatio: 1.25, + CacheCreation5mRatio: 1.25, + CacheCreation1hRatio: 2, + GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1}, + }, + StartTime: time.Now(), + } + + usage := &dto.Usage{ + PromptTokens: 62, + CompletionTokens: 95, + PromptTokensDetails: dto.InputTokenDetails{ + CachedTokens: 3544, + }, + ClaudeCacheCreation5mTokens: 586, + } + + summary := calculateTextQuotaSummary(ctx, relayInfo, usage) + + // 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624 + require.Equal(t, 1624, summary.Quota) +}