Merge pull request #3400 from seefs001/fix/openai-usage

refactor: optimize billing flow for OpenAI-to-Anthropic convert
This commit is contained in:
Calcium-Ion
2026-03-23 15:03:57 +08:00
committed by GitHub
16 changed files with 809 additions and 433 deletions

View File

@@ -220,10 +220,12 @@ type CompletionsStreamResponse struct {
}
type Usage struct {
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"`
PromptTokens int `json:"prompt_tokens"`
CompletionTokens int `json:"completion_tokens"`
TotalTokens int `json:"total_tokens"`
PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"`
UsageSemantic string `json:"usage_semantic,omitempty"`
UsageSource string `json:"usage_source,omitempty"`
PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"`
CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
@@ -251,7 +253,7 @@ type OpenAIVideoResponse struct {
type InputTokenDetails struct {
CachedTokens int `json:"cached_tokens"`
CachedCreationTokens int `json:"-"`
CachedCreationTokens int `json:"cached_creation_tokens,omitempty"`
TextTokens int `json:"text_tokens"`
AudioTokens int `json:"audio_tokens"`
ImageTokens int `json:"image_tokens"`

View File

@@ -70,7 +70,7 @@ func AudioHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type
if usage.(*dto.Usage).CompletionTokenDetails.AudioTokens > 0 || usage.(*dto.Usage).PromptTokensDetails.AudioTokens > 0 {
service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
} else {
postConsumeQuota(c, info, usage.(*dto.Usage))
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
}
return nil

View File

@@ -555,6 +555,35 @@ type ClaudeResponseInfo struct {
Done bool
}
func cacheCreationTokensForOpenAIUsage(usage *dto.Usage) int {
if usage == nil {
return 0
}
splitCacheCreationTokens := usage.ClaudeCacheCreation5mTokens + usage.ClaudeCacheCreation1hTokens
if splitCacheCreationTokens == 0 {
return usage.PromptTokensDetails.CachedCreationTokens
}
if usage.PromptTokensDetails.CachedCreationTokens > splitCacheCreationTokens {
return usage.PromptTokensDetails.CachedCreationTokens
}
return splitCacheCreationTokens
}
func buildOpenAIStyleUsageFromClaudeUsage(usage *dto.Usage) dto.Usage {
if usage == nil {
return dto.Usage{}
}
clone := *usage
cacheCreationTokens := cacheCreationTokensForOpenAIUsage(usage)
totalInputTokens := usage.PromptTokens + usage.PromptTokensDetails.CachedTokens + cacheCreationTokens
clone.PromptTokens = totalInputTokens
clone.InputTokens = totalInputTokens
clone.TotalTokens = totalInputTokens + usage.CompletionTokens
clone.UsageSemantic = "openai"
clone.UsageSource = "anthropic"
return clone
}
func buildMessageDeltaPatchUsage(claudeResponse *dto.ClaudeResponse, claudeInfo *ClaudeResponseInfo) *dto.ClaudeUsage {
usage := &dto.ClaudeUsage{}
if claudeResponse != nil && claudeResponse.Usage != nil {
@@ -643,6 +672,7 @@ func FormatClaudeResponseInfo(claudeResponse *dto.ClaudeResponse, oaiResponse *d
// message_start, 获取usage
if claudeResponse.Message != nil && claudeResponse.Message.Usage != nil {
claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
claudeInfo.Usage.UsageSemantic = "anthropic"
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Message.Usage.GetCacheCreation5mTokens()
@@ -661,6 +691,7 @@ func FormatClaudeResponseInfo(claudeResponse *dto.ClaudeResponse, oaiResponse *d
} else if claudeResponse.Type == "message_delta" {
// 最终的usage获取
if claudeResponse.Usage != nil {
claudeInfo.Usage.UsageSemantic = "anthropic"
if claudeResponse.Usage.InputTokens > 0 {
// 不叠加,只取最新的
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
@@ -754,12 +785,16 @@ func HandleStreamFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, clau
}
claudeInfo.Usage = service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
}
if claudeInfo.Usage != nil {
claudeInfo.Usage.UsageSemantic = "anthropic"
}
if info.RelayFormat == types.RelayFormatClaude {
//
} else if info.RelayFormat == types.RelayFormatOpenAI {
if info.ShouldIncludeUsage {
response := helper.GenerateFinalUsageResponse(claudeInfo.ResponseId, claudeInfo.Created, info.UpstreamModelName, *claudeInfo.Usage)
openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(claudeInfo.Usage)
response := helper.GenerateFinalUsageResponse(claudeInfo.ResponseId, claudeInfo.Created, info.UpstreamModelName, openAIUsage)
err := helper.ObjectData(c, response)
if err != nil {
common.SysLog("send final response failed: " + err.Error())
@@ -810,6 +845,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
claudeInfo.Usage.UsageSemantic = "anthropic"
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Usage.GetCacheCreation5mTokens()
@@ -819,7 +855,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
switch info.RelayFormat {
case types.RelayFormatOpenAI:
openaiResponse := ResponseClaude2OpenAI(&claudeResponse)
openaiResponse.Usage = *claudeInfo.Usage
openaiResponse.Usage = buildOpenAIStyleUsageFromClaudeUsage(claudeInfo.Usage)
responseData, err = json.Marshal(openaiResponse)
if err != nil {
return types.NewError(err, types.ErrorCodeBadResponseBody)

View File

@@ -173,3 +173,85 @@ func TestFormatClaudeResponseInfo_ContentBlockDelta(t *testing.T) {
t.Errorf("ResponseText = %q, want %q", claudeInfo.ResponseText.String(), "hello")
}
}
func TestBuildOpenAIStyleUsageFromClaudeUsage(t *testing.T) {
usage := &dto.Usage{
PromptTokens: 100,
CompletionTokens: 20,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 30,
CachedCreationTokens: 50,
},
ClaudeCacheCreation5mTokens: 10,
ClaudeCacheCreation1hTokens: 20,
UsageSemantic: "anthropic",
}
openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage)
if openAIUsage.PromptTokens != 180 {
t.Fatalf("PromptTokens = %d, want 180", openAIUsage.PromptTokens)
}
if openAIUsage.InputTokens != 180 {
t.Fatalf("InputTokens = %d, want 180", openAIUsage.InputTokens)
}
if openAIUsage.TotalTokens != 200 {
t.Fatalf("TotalTokens = %d, want 200", openAIUsage.TotalTokens)
}
if openAIUsage.UsageSemantic != "openai" {
t.Fatalf("UsageSemantic = %s, want openai", openAIUsage.UsageSemantic)
}
if openAIUsage.UsageSource != "anthropic" {
t.Fatalf("UsageSource = %s, want anthropic", openAIUsage.UsageSource)
}
}
func TestBuildOpenAIStyleUsageFromClaudeUsagePreservesCacheCreationRemainder(t *testing.T) {
tests := []struct {
name string
cachedCreationTokens int
cacheCreationTokens5m int
cacheCreationTokens1h int
expectedTotalInputToken int
}{
{
name: "prefers aggregate when it includes remainder",
cachedCreationTokens: 50,
cacheCreationTokens5m: 10,
cacheCreationTokens1h: 20,
expectedTotalInputToken: 180,
},
{
name: "falls back to split tokens when aggregate missing",
cachedCreationTokens: 0,
cacheCreationTokens5m: 10,
cacheCreationTokens1h: 20,
expectedTotalInputToken: 160,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
usage := &dto.Usage{
PromptTokens: 100,
CompletionTokens: 20,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 30,
CachedCreationTokens: tt.cachedCreationTokens,
},
ClaudeCacheCreation5mTokens: tt.cacheCreationTokens5m,
ClaudeCacheCreation1hTokens: tt.cacheCreationTokens1h,
UsageSemantic: "anthropic",
}
openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage)
if openAIUsage.PromptTokens != tt.expectedTotalInputToken {
t.Fatalf("PromptTokens = %d, want %d", openAIUsage.PromptTokens, tt.expectedTotalInputToken)
}
if openAIUsage.InputTokens != tt.expectedTotalInputToken {
t.Fatalf("InputTokens = %d, want %d", openAIUsage.InputTokens, tt.expectedTotalInputToken)
}
})
}
}

View File

@@ -122,7 +122,7 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
return newApiErr
}
service.PostClaudeConsumeQuota(c, info, usage)
service.PostTextConsumeQuota(c, info, usage, nil)
return nil
}
@@ -190,6 +190,6 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
return newAPIError
}
service.PostClaudeConsumeQuota(c, info, usage.(*dto.Usage))
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
return nil
}

View File

@@ -6,25 +6,20 @@ import (
"io"
"net/http"
"strings"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/model"
relaycommon "github.com/QuantumNous/new-api/relay/common"
relayconstant "github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/relay/helper"
"github.com/QuantumNous/new-api/service"
"github.com/QuantumNous/new-api/setting/model_setting"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/setting/ratio_setting"
"github.com/QuantumNous/new-api/types"
"github.com/samber/lo"
"github.com/shopspring/decimal"
"github.com/gin-gonic/gin"
)
@@ -93,7 +88,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types
if containAudioTokens && containsAudioRatios {
service.PostAudioConsumeQuota(c, info, usage, "")
} else {
postConsumeQuota(c, info, usage)
service.PostTextConsumeQuota(c, info, usage, nil)
}
return nil
}
@@ -216,293 +211,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types
if containAudioTokens && containsAudioRatios {
service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
} else {
postConsumeQuota(c, info, usage.(*dto.Usage))
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
}
return nil
}
func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent ...string) {
originUsage := usage
if usage == nil {
usage = &dto.Usage{
PromptTokens: relayInfo.GetEstimatePromptTokens(),
CompletionTokens: 0,
TotalTokens: relayInfo.GetEstimatePromptTokens(),
}
extraContent = append(extraContent, "上游无计费信息")
}
if originUsage != nil {
service.ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
}
adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
promptTokens := usage.PromptTokens
cacheTokens := usage.PromptTokensDetails.CachedTokens
imageTokens := usage.PromptTokensDetails.ImageTokens
audioTokens := usage.PromptTokensDetails.AudioTokens
completionTokens := usage.CompletionTokens
cachedCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
modelName := relayInfo.OriginModelName
tokenName := ctx.GetString("token_name")
completionRatio := relayInfo.PriceData.CompletionRatio
cacheRatio := relayInfo.PriceData.CacheRatio
imageRatio := relayInfo.PriceData.ImageRatio
modelRatio := relayInfo.PriceData.ModelRatio
groupRatio := relayInfo.PriceData.GroupRatioInfo.GroupRatio
modelPrice := relayInfo.PriceData.ModelPrice
cachedCreationRatio := relayInfo.PriceData.CacheCreationRatio
// Convert values to decimal for precise calculation
dPromptTokens := decimal.NewFromInt(int64(promptTokens))
dCacheTokens := decimal.NewFromInt(int64(cacheTokens))
dImageTokens := decimal.NewFromInt(int64(imageTokens))
dAudioTokens := decimal.NewFromInt(int64(audioTokens))
dCompletionTokens := decimal.NewFromInt(int64(completionTokens))
dCachedCreationTokens := decimal.NewFromInt(int64(cachedCreationTokens))
dCompletionRatio := decimal.NewFromFloat(completionRatio)
dCacheRatio := decimal.NewFromFloat(cacheRatio)
dImageRatio := decimal.NewFromFloat(imageRatio)
dModelRatio := decimal.NewFromFloat(modelRatio)
dGroupRatio := decimal.NewFromFloat(groupRatio)
dModelPrice := decimal.NewFromFloat(modelPrice)
dCachedCreationRatio := decimal.NewFromFloat(cachedCreationRatio)
dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
ratio := dModelRatio.Mul(dGroupRatio)
// openai web search 工具计费
var dWebSearchQuota decimal.Decimal
var webSearchPrice float64
// response api 格式工具计费
if relayInfo.ResponsesUsageInfo != nil {
if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 {
// 计算 web search 调用的配额 (配额 = 价格 * 调用次数 / 1000 * 分组倍率)
webSearchPrice = operation_setting.GetWebSearchPricePerThousand(modelName, webSearchTool.SearchContextSize)
dWebSearchQuota = decimal.NewFromFloat(webSearchPrice).
Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))).
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 %d 次,上下文大小 %s调用花费 %s",
webSearchTool.CallCount, webSearchTool.SearchContextSize, dWebSearchQuota.String()))
}
} else if strings.HasSuffix(modelName, "search-preview") {
// search-preview 模型不支持 response api
searchContextSize := ctx.GetString("chat_completion_web_search_context_size")
if searchContextSize == "" {
searchContextSize = "medium"
}
webSearchPrice = operation_setting.GetWebSearchPricePerThousand(modelName, searchContextSize)
dWebSearchQuota = decimal.NewFromFloat(webSearchPrice).
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 1 次,上下文大小 %s调用花费 %s",
searchContextSize, dWebSearchQuota.String()))
}
// claude web search tool 计费
var dClaudeWebSearchQuota decimal.Decimal
var claudeWebSearchPrice float64
claudeWebSearchCallCount := ctx.GetInt("claude_web_search_requests")
if claudeWebSearchCallCount > 0 {
claudeWebSearchPrice = operation_setting.GetClaudeWebSearchPricePerThousand()
dClaudeWebSearchQuota = decimal.NewFromFloat(claudeWebSearchPrice).
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit).Mul(decimal.NewFromInt(int64(claudeWebSearchCallCount)))
extraContent = append(extraContent, fmt.Sprintf("Claude Web Search 调用 %d 次,调用花费 %s",
claudeWebSearchCallCount, dClaudeWebSearchQuota.String()))
}
// file search tool 计费
var dFileSearchQuota decimal.Decimal
var fileSearchPrice float64
if relayInfo.ResponsesUsageInfo != nil {
if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 {
fileSearchPrice = operation_setting.GetFileSearchPricePerThousand()
dFileSearchQuota = decimal.NewFromFloat(fileSearchPrice).
Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
extraContent = append(extraContent, fmt.Sprintf("File Search 调用 %d 次,调用花费 %s",
fileSearchTool.CallCount, dFileSearchQuota.String()))
}
}
var dImageGenerationCallQuota decimal.Decimal
var imageGenerationCallPrice float64
if ctx.GetBool("image_generation_call") {
imageGenerationCallPrice = operation_setting.GetGPTImage1PriceOnceCall(ctx.GetString("image_generation_call_quality"), ctx.GetString("image_generation_call_size"))
dImageGenerationCallQuota = decimal.NewFromFloat(imageGenerationCallPrice).Mul(dGroupRatio).Mul(dQuotaPerUnit)
extraContent = append(extraContent, fmt.Sprintf("Image Generation Call 花费 %s", dImageGenerationCallQuota.String()))
}
var quotaCalculateDecimal decimal.Decimal
var audioInputQuota decimal.Decimal
var audioInputPrice float64
isClaudeUsageSemantic := relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude
if !relayInfo.PriceData.UsePrice {
baseTokens := dPromptTokens
// 减去 cached tokens
// Anthropic API 的 input_tokens 已经不包含缓存 tokens不需要减去
// OpenAI/OpenRouter 等 API 的 prompt_tokens 包含缓存 tokens需要减去
var cachedTokensWithRatio decimal.Decimal
if !dCacheTokens.IsZero() {
if !isClaudeUsageSemantic {
baseTokens = baseTokens.Sub(dCacheTokens)
}
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
}
var dCachedCreationTokensWithRatio decimal.Decimal
if !dCachedCreationTokens.IsZero() {
if !isClaudeUsageSemantic {
baseTokens = baseTokens.Sub(dCachedCreationTokens)
}
dCachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCachedCreationRatio)
}
// 减去 image tokens
var imageTokensWithRatio decimal.Decimal
if !dImageTokens.IsZero() {
baseTokens = baseTokens.Sub(dImageTokens)
imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
}
// 减去 Gemini audio tokens
if !dAudioTokens.IsZero() {
audioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(modelName)
if audioInputPrice > 0 {
// 重新计算 base tokens
baseTokens = baseTokens.Sub(dAudioTokens)
audioInputQuota = decimal.NewFromFloat(audioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
extraContent = append(extraContent, fmt.Sprintf("Audio Input 花费 %s", audioInputQuota.String()))
}
}
promptQuota := baseTokens.Add(cachedTokensWithRatio).
Add(imageTokensWithRatio).
Add(dCachedCreationTokensWithRatio)
completionQuota := dCompletionTokens.Mul(dCompletionRatio)
quotaCalculateDecimal = promptQuota.Add(completionQuota).Mul(ratio)
if !ratio.IsZero() && quotaCalculateDecimal.LessThanOrEqual(decimal.Zero) {
quotaCalculateDecimal = decimal.NewFromInt(1)
}
} else {
quotaCalculateDecimal = dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio)
}
// 添加 responses tools call 调用的配额
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
// 添加 audio input 独立计费
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
// 添加 image generation call 计费
quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
if len(relayInfo.PriceData.OtherRatios) > 0 {
for key, otherRatio := range relayInfo.PriceData.OtherRatios {
dOtherRatio := decimal.NewFromFloat(otherRatio)
quotaCalculateDecimal = quotaCalculateDecimal.Mul(dOtherRatio)
extraContent = append(extraContent, fmt.Sprintf("其他倍率 %s: %f", key, otherRatio))
}
}
quota := int(quotaCalculateDecimal.Round(0).IntPart())
totalTokens := promptTokens + completionTokens
//var logContent string
// record all the consume log even if quota is 0
if totalTokens == 0 {
// in this case, must be some error happened
// we cannot just return, because we may have to return the pre-consumed quota
quota = 0
extraContent = append(extraContent, "上游没有返回计费信息,无法扣费(可能是上游超时)")
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
"tokenId %d, model %s pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
} else {
if !ratio.IsZero() && quota == 0 {
quota = 1
}
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
}
if err := service.SettleBilling(ctx, relayInfo, quota); err != nil {
logger.LogError(ctx, "error settling billing: "+err.Error())
}
logModel := modelName
if strings.HasPrefix(logModel, "gpt-4-gizmo") {
logModel = "gpt-4-gizmo-*"
extraContent = append(extraContent, fmt.Sprintf("模型 %s", modelName))
}
if strings.HasPrefix(logModel, "gpt-4o-gizmo") {
logModel = "gpt-4o-gizmo-*"
extraContent = append(extraContent, fmt.Sprintf("模型 %s", modelName))
}
logContent := strings.Join(extraContent, ", ")
other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
if adminRejectReason != "" {
other["reject_reason"] = adminRejectReason
}
// For chat-based calls to the Claude model, tagging is required. Using Claude's rendering logs, the two approaches handle input rendering differently.
if isClaudeUsageSemantic {
other["claude"] = true
other["usage_semantic"] = "anthropic"
}
if imageTokens != 0 {
other["image"] = true
other["image_ratio"] = imageRatio
other["image_output"] = imageTokens
}
if cachedCreationTokens != 0 {
other["cache_creation_tokens"] = cachedCreationTokens
other["cache_creation_ratio"] = cachedCreationRatio
}
if !dWebSearchQuota.IsZero() {
if relayInfo.ResponsesUsageInfo != nil {
if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists {
other["web_search"] = true
other["web_search_call_count"] = webSearchTool.CallCount
other["web_search_price"] = webSearchPrice
}
} else if strings.HasSuffix(modelName, "search-preview") {
other["web_search"] = true
other["web_search_call_count"] = 1
other["web_search_price"] = webSearchPrice
}
} else if !dClaudeWebSearchQuota.IsZero() {
other["web_search"] = true
other["web_search_call_count"] = claudeWebSearchCallCount
other["web_search_price"] = claudeWebSearchPrice
}
if !dFileSearchQuota.IsZero() && relayInfo.ResponsesUsageInfo != nil {
if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists {
other["file_search"] = true
other["file_search_call_count"] = fileSearchTool.CallCount
other["file_search_price"] = fileSearchPrice
}
}
if !audioInputQuota.IsZero() {
other["audio_input_seperate_price"] = true
other["audio_input_token_count"] = audioTokens
other["audio_input_price"] = audioInputPrice
}
if !dImageGenerationCallQuota.IsZero() {
other["image_generation_call"] = true
other["image_generation_call_price"] = imageGenerationCallPrice
}
model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
ChannelId: relayInfo.ChannelId,
PromptTokens: promptTokens,
CompletionTokens: completionTokens,
ModelName: logModel,
TokenName: tokenName,
Quota: quota,
Content: logContent,
TokenId: relayInfo.TokenId,
UseTimeSeconds: int(useTimeSeconds),
IsStream: relayInfo.IsStream,
Group: relayInfo.UsingGroup,
Other: other,
})
}

View File

@@ -82,6 +82,6 @@ func EmbeddingHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
return newAPIError
}
postConsumeQuota(c, info, usage.(*dto.Usage))
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
return nil
}

View File

@@ -194,7 +194,7 @@ func GeminiHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
return openaiErr
}
postConsumeQuota(c, info, usage.(*dto.Usage))
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
return nil
}
@@ -288,6 +288,6 @@ func GeminiEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo) (newAPI
return openaiErr
}
postConsumeQuota(c, info, usage.(*dto.Usage))
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
return nil
}

View File

@@ -141,6 +141,6 @@ func ImageHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type
logContent = append(logContent, fmt.Sprintf("生成数量 %d", imageN))
}
postConsumeQuota(c, info, usage.(*dto.Usage), logContent...)
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), logContent)
return nil
}

View File

@@ -96,6 +96,6 @@ func RerankHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
return newAPIError
}
postConsumeQuota(c, info, usage.(*dto.Usage))
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
return nil
}

View File

@@ -145,7 +145,7 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
info.PriceData = originPriceData
return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry())
}
postConsumeQuota(c, info, usageDto)
service.PostTextConsumeQuota(c, info, usageDto, nil)
info.OriginModelName = originModelName
info.PriceData = originPriceData
@@ -155,7 +155,7 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") {
service.PostAudioConsumeQuota(c, info, usageDto, "")
} else {
postConsumeQuota(c, info, usageDto)
service.PostTextConsumeQuota(c, info, usageDto, nil)
}
return nil
}

View File

@@ -223,6 +223,25 @@ func generateStopBlock(index int) *dto.ClaudeResponse {
}
}
func buildClaudeUsageFromOpenAIUsage(oaiUsage *dto.Usage) *dto.ClaudeUsage {
if oaiUsage == nil {
return nil
}
usage := &dto.ClaudeUsage{
InputTokens: oaiUsage.PromptTokens,
OutputTokens: oaiUsage.CompletionTokens,
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
}
if oaiUsage.ClaudeCacheCreation5mTokens > 0 || oaiUsage.ClaudeCacheCreation1hTokens > 0 {
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{
Ephemeral5mInputTokens: oaiUsage.ClaudeCacheCreation5mTokens,
Ephemeral1hInputTokens: oaiUsage.ClaudeCacheCreation1hTokens,
}
}
return usage
}
func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse {
if info.ClaudeConvertInfo.Done {
return nil
@@ -391,13 +410,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
}
if oaiUsage != nil {
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
Type: "message_delta",
Usage: &dto.ClaudeUsage{
InputTokens: oaiUsage.PromptTokens,
OutputTokens: oaiUsage.CompletionTokens,
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
},
Type: "message_delta",
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
Delta: &dto.ClaudeMediaMessage{
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
},
@@ -419,13 +433,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
oaiUsage := info.ClaudeConvertInfo.Usage
if oaiUsage != nil {
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
Type: "message_delta",
Usage: &dto.ClaudeUsage{
InputTokens: oaiUsage.PromptTokens,
OutputTokens: oaiUsage.CompletionTokens,
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
},
Type: "message_delta",
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
Delta: &dto.ClaudeMediaMessage{
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
},
@@ -555,13 +564,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
}
if oaiUsage != nil {
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
Type: "message_delta",
Usage: &dto.ClaudeUsage{
InputTokens: oaiUsage.PromptTokens,
OutputTokens: oaiUsage.CompletionTokens,
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
},
Type: "message_delta",
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
Delta: &dto.ClaudeMediaMessage{
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
},

View File

@@ -73,6 +73,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m
other["admin_info"] = adminInfo
appendRequestPath(ctx, relayInfo, other)
appendRequestConversionChain(relayInfo, other)
appendFinalRequestFormat(relayInfo, other)
appendBillingInfo(relayInfo, other)
appendParamOverrideInfo(relayInfo, other)
return other
@@ -167,6 +168,17 @@ func appendRequestConversionChain(relayInfo *relaycommon.RelayInfo, other map[st
other["request_conversion"] = chain
}
func appendFinalRequestFormat(relayInfo *relaycommon.RelayInfo, other map[string]interface{}) {
if relayInfo == nil || other == nil {
return
}
if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
// claude indicates the final upstream request format is Claude Messages.
// Frontend log rendering uses this to keep the original Claude input display.
other["claude"] = true
}
}
func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice, userGroupRatio float64) map[string]interface{} {
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice, userGroupRatio)
info["ws"] = true

View File

@@ -235,108 +235,6 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
})
}
func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) {
if usage != nil {
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
}
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
promptTokens := usage.PromptTokens
completionTokens := usage.CompletionTokens
modelName := relayInfo.OriginModelName
tokenName := ctx.GetString("token_name")
completionRatio := relayInfo.PriceData.CompletionRatio
modelRatio := relayInfo.PriceData.ModelRatio
groupRatio := relayInfo.PriceData.GroupRatioInfo.GroupRatio
modelPrice := relayInfo.PriceData.ModelPrice
cacheRatio := relayInfo.PriceData.CacheRatio
cacheTokens := usage.PromptTokensDetails.CachedTokens
cacheCreationRatio := relayInfo.PriceData.CacheCreationRatio
cacheCreationRatio5m := relayInfo.PriceData.CacheCreation5mRatio
cacheCreationRatio1h := relayInfo.PriceData.CacheCreation1hRatio
cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
cacheCreationTokens5m := usage.ClaudeCacheCreation5mTokens
cacheCreationTokens1h := usage.ClaudeCacheCreation1hTokens
if relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
promptTokens -= cacheTokens
isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(modelName, relayInfo.PriceData.ModelRatio)
if cacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData)
if maybeCacheCreationTokens >= 0 && promptTokens >= maybeCacheCreationTokens {
cacheCreationTokens = maybeCacheCreationTokens
}
}
promptTokens -= cacheCreationTokens
}
calculateQuota := 0.0
if !relayInfo.PriceData.UsePrice {
calculateQuota = float64(promptTokens)
calculateQuota += float64(cacheTokens) * cacheRatio
calculateQuota += float64(cacheCreationTokens5m) * cacheCreationRatio5m
calculateQuota += float64(cacheCreationTokens1h) * cacheCreationRatio1h
remainingCacheCreationTokens := cacheCreationTokens - cacheCreationTokens5m - cacheCreationTokens1h
if remainingCacheCreationTokens > 0 {
calculateQuota += float64(remainingCacheCreationTokens) * cacheCreationRatio
}
calculateQuota += float64(completionTokens) * completionRatio
calculateQuota = calculateQuota * groupRatio * modelRatio
} else {
calculateQuota = modelPrice * common.QuotaPerUnit * groupRatio
}
if modelRatio != 0 && calculateQuota <= 0 {
calculateQuota = 1
}
quota := int(calculateQuota)
totalTokens := promptTokens + completionTokens
var logContent string
// record all the consume log even if quota is 0
if totalTokens == 0 {
// in this case, must be some error happened
// we cannot just return, because we may have to return the pre-consumed quota
quota = 0
logContent += fmt.Sprintf("(可能是上游出错)")
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
"tokenId %d, model %s pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
} else {
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
}
if err := SettleBilling(ctx, relayInfo, quota); err != nil {
logger.LogError(ctx, "error settling billing: "+err.Error())
}
other := GenerateClaudeOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio,
cacheTokens, cacheRatio,
cacheCreationTokens, cacheCreationRatio,
cacheCreationTokens5m, cacheCreationRatio5m,
cacheCreationTokens1h, cacheCreationRatio1h,
modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
ChannelId: relayInfo.ChannelId,
PromptTokens: promptTokens,
CompletionTokens: completionTokens,
ModelName: modelName,
TokenName: tokenName,
Quota: quota,
Content: logContent,
TokenId: relayInfo.TokenId,
UseTimeSeconds: int(useTimeSeconds),
IsStream: relayInfo.IsStream,
Group: relayInfo.UsingGroup,
Other: other,
})
}
func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData types.PriceData) int {
if priceData.CacheCreationRatio == 1 {
return 0

427
service/text_quota.go Normal file
View File

@@ -0,0 +1,427 @@
package service
import (
"fmt"
"strings"
"time"
"github.com/QuantumNous/new-api/common"
"github.com/QuantumNous/new-api/constant"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/logger"
"github.com/QuantumNous/new-api/model"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
"github.com/shopspring/decimal"
)
type textQuotaSummary struct {
PromptTokens int
CompletionTokens int
TotalTokens int
CacheTokens int
CacheCreationTokens int
CacheCreationTokens5m int
CacheCreationTokens1h int
ImageTokens int
AudioTokens int
ModelName string
TokenName string
UseTimeSeconds int64
CompletionRatio float64
CacheRatio float64
ImageRatio float64
ModelRatio float64
GroupRatio float64
ModelPrice float64
CacheCreationRatio float64
CacheCreationRatio5m float64
CacheCreationRatio1h float64
Quota int
IsClaudeUsageSemantic bool
UsageSemantic string
WebSearchPrice float64
WebSearchCallCount int
ClaudeWebSearchPrice float64
ClaudeWebSearchCallCount int
FileSearchPrice float64
FileSearchCallCount int
AudioInputPrice float64
ImageGenerationCallPrice float64
}
func cacheWriteTokensTotal(summary textQuotaSummary) int {
if summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0 {
splitCacheWriteTokens := summary.CacheCreationTokens5m + summary.CacheCreationTokens1h
if summary.CacheCreationTokens > splitCacheWriteTokens {
return summary.CacheCreationTokens
}
return splitCacheWriteTokens
}
return summary.CacheCreationTokens
}
func isLegacyClaudeDerivedOpenAIUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) bool {
if relayInfo == nil || usage == nil {
return false
}
if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
return false
}
if usage.UsageSource != "" || usage.UsageSemantic != "" {
return false
}
return usage.ClaudeCacheCreation5mTokens > 0 || usage.ClaudeCacheCreation1hTokens > 0
}
func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) textQuotaSummary {
summary := textQuotaSummary{
ModelName: relayInfo.OriginModelName,
TokenName: ctx.GetString("token_name"),
UseTimeSeconds: time.Now().Unix() - relayInfo.StartTime.Unix(),
CompletionRatio: relayInfo.PriceData.CompletionRatio,
CacheRatio: relayInfo.PriceData.CacheRatio,
ImageRatio: relayInfo.PriceData.ImageRatio,
ModelRatio: relayInfo.PriceData.ModelRatio,
GroupRatio: relayInfo.PriceData.GroupRatioInfo.GroupRatio,
ModelPrice: relayInfo.PriceData.ModelPrice,
CacheCreationRatio: relayInfo.PriceData.CacheCreationRatio,
CacheCreationRatio5m: relayInfo.PriceData.CacheCreation5mRatio,
CacheCreationRatio1h: relayInfo.PriceData.CacheCreation1hRatio,
UsageSemantic: usageSemanticFromUsage(relayInfo, usage),
}
summary.IsClaudeUsageSemantic = summary.UsageSemantic == "anthropic"
if usage == nil {
usage = &dto.Usage{
PromptTokens: relayInfo.GetEstimatePromptTokens(),
CompletionTokens: 0,
TotalTokens: relayInfo.GetEstimatePromptTokens(),
}
}
summary.PromptTokens = usage.PromptTokens
summary.CompletionTokens = usage.CompletionTokens
summary.TotalTokens = usage.PromptTokens + usage.CompletionTokens
summary.CacheTokens = usage.PromptTokensDetails.CachedTokens
summary.CacheCreationTokens = usage.PromptTokensDetails.CachedCreationTokens
summary.CacheCreationTokens5m = usage.ClaudeCacheCreation5mTokens
summary.CacheCreationTokens1h = usage.ClaudeCacheCreation1hTokens
summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
summary.PromptTokens -= summary.CacheTokens
isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio)
if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData)
if maybeCacheCreationTokens >= 0 && summary.PromptTokens >= maybeCacheCreationTokens {
summary.CacheCreationTokens = maybeCacheCreationTokens
}
}
summary.PromptTokens -= summary.CacheCreationTokens
}
dPromptTokens := decimal.NewFromInt(int64(summary.PromptTokens))
dCacheTokens := decimal.NewFromInt(int64(summary.CacheTokens))
dImageTokens := decimal.NewFromInt(int64(summary.ImageTokens))
dAudioTokens := decimal.NewFromInt(int64(summary.AudioTokens))
dCompletionTokens := decimal.NewFromInt(int64(summary.CompletionTokens))
dCachedCreationTokens := decimal.NewFromInt(int64(summary.CacheCreationTokens))
dCompletionRatio := decimal.NewFromFloat(summary.CompletionRatio)
dCacheRatio := decimal.NewFromFloat(summary.CacheRatio)
dImageRatio := decimal.NewFromFloat(summary.ImageRatio)
dModelRatio := decimal.NewFromFloat(summary.ModelRatio)
dGroupRatio := decimal.NewFromFloat(summary.GroupRatio)
dModelPrice := decimal.NewFromFloat(summary.ModelPrice)
dCacheCreationRatio := decimal.NewFromFloat(summary.CacheCreationRatio)
dCacheCreationRatio5m := decimal.NewFromFloat(summary.CacheCreationRatio5m)
dCacheCreationRatio1h := decimal.NewFromFloat(summary.CacheCreationRatio1h)
dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
ratio := dModelRatio.Mul(dGroupRatio)
var dWebSearchQuota decimal.Decimal
if relayInfo.ResponsesUsageInfo != nil {
if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 {
summary.WebSearchCallCount = webSearchTool.CallCount
summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, webSearchTool.SearchContextSize)
dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice).
Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))).
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
}
} else if strings.HasSuffix(summary.ModelName, "search-preview") {
searchContextSize := ctx.GetString("chat_completion_web_search_context_size")
if searchContextSize == "" {
searchContextSize = "medium"
}
summary.WebSearchCallCount = 1
summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, searchContextSize)
dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice).
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
}
var dClaudeWebSearchQuota decimal.Decimal
summary.ClaudeWebSearchCallCount = ctx.GetInt("claude_web_search_requests")
if summary.ClaudeWebSearchCallCount > 0 {
summary.ClaudeWebSearchPrice = operation_setting.GetClaudeWebSearchPricePerThousand()
dClaudeWebSearchQuota = decimal.NewFromFloat(summary.ClaudeWebSearchPrice).
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit).
Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount)))
}
var dFileSearchQuota decimal.Decimal
if relayInfo.ResponsesUsageInfo != nil {
if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 {
summary.FileSearchCallCount = fileSearchTool.CallCount
summary.FileSearchPrice = operation_setting.GetFileSearchPricePerThousand()
dFileSearchQuota = decimal.NewFromFloat(summary.FileSearchPrice).
Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
}
}
var dImageGenerationCallQuota decimal.Decimal
if ctx.GetBool("image_generation_call") {
summary.ImageGenerationCallPrice = operation_setting.GetGPTImage1PriceOnceCall(ctx.GetString("image_generation_call_quality"), ctx.GetString("image_generation_call_size"))
dImageGenerationCallQuota = decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(dGroupRatio).Mul(dQuotaPerUnit)
}
var audioInputQuota decimal.Decimal
if !relayInfo.PriceData.UsePrice {
baseTokens := dPromptTokens
var cachedTokensWithRatio decimal.Decimal
if !dCacheTokens.IsZero() {
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
baseTokens = baseTokens.Sub(dCacheTokens)
}
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
}
var cachedCreationTokensWithRatio decimal.Decimal
hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0
if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens {
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
baseTokens = baseTokens.Sub(dCachedCreationTokens)
cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio)
} else {
remaining := summary.CacheCreationTokens - summary.CacheCreationTokens5m - summary.CacheCreationTokens1h
if remaining < 0 {
remaining = 0
}
cachedCreationTokensWithRatio = decimal.NewFromInt(int64(remaining)).Mul(dCacheCreationRatio)
cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens5m)).Mul(dCacheCreationRatio5m))
cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens1h)).Mul(dCacheCreationRatio1h))
}
}
var imageTokensWithRatio decimal.Decimal
if !dImageTokens.IsZero() {
baseTokens = baseTokens.Sub(dImageTokens)
imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
}
if !dAudioTokens.IsZero() {
summary.AudioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(summary.ModelName)
if summary.AudioInputPrice > 0 {
baseTokens = baseTokens.Sub(dAudioTokens)
audioInputQuota = decimal.NewFromFloat(summary.AudioInputPrice).
Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
}
}
promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio).Add(cachedCreationTokensWithRatio)
completionQuota := dCompletionTokens.Mul(dCompletionRatio)
quotaCalculateDecimal := promptQuota.Add(completionQuota).Mul(ratio)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
if len(relayInfo.PriceData.OtherRatios) > 0 {
for _, otherRatio := range relayInfo.PriceData.OtherRatios {
quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio))
}
}
if !ratio.IsZero() && quotaCalculateDecimal.LessThanOrEqual(decimal.Zero) {
quotaCalculateDecimal = decimal.NewFromInt(1)
}
summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart())
} else {
quotaCalculateDecimal := dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
if len(relayInfo.PriceData.OtherRatios) > 0 {
for _, otherRatio := range relayInfo.PriceData.OtherRatios {
quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio))
}
}
summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart())
}
if summary.TotalTokens == 0 {
summary.Quota = 0
} else if !ratio.IsZero() && summary.Quota == 0 {
summary.Quota = 1
}
return summary
}
func usageSemanticFromUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) string {
if usage != nil && usage.UsageSemantic != "" {
return usage.UsageSemantic
}
if relayInfo != nil && relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
return "anthropic"
}
return "openai"
}
func PostTextConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent []string) {
originUsage := usage
if usage == nil {
extraContent = append(extraContent, "上游无计费信息")
}
if originUsage != nil {
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
}
adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
if summary.WebSearchCallCount > 0 {
extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 %d 次,调用花费 %s", summary.WebSearchCallCount, decimal.NewFromFloat(summary.WebSearchPrice).Mul(decimal.NewFromInt(int64(summary.WebSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
}
if summary.ClaudeWebSearchCallCount > 0 {
extraContent = append(extraContent, fmt.Sprintf("Claude Web Search 调用 %d 次,调用花费 %s", summary.ClaudeWebSearchCallCount, decimal.NewFromFloat(summary.ClaudeWebSearchPrice).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount))).String()))
}
if summary.FileSearchCallCount > 0 {
extraContent = append(extraContent, fmt.Sprintf("File Search 调用 %d 次,调用花费 %s", summary.FileSearchCallCount, decimal.NewFromFloat(summary.FileSearchPrice).Mul(decimal.NewFromInt(int64(summary.FileSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
}
if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 {
extraContent = append(extraContent, fmt.Sprintf("Audio Input 花费 %s", decimal.NewFromFloat(summary.AudioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(decimal.NewFromInt(int64(summary.AudioTokens))).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
}
if summary.ImageGenerationCallPrice > 0 {
extraContent = append(extraContent, fmt.Sprintf("Image Generation Call 花费 %s", decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
}
if summary.TotalTokens == 0 {
extraContent = append(extraContent, "上游没有返回计费信息,无法扣费(可能是上游超时)")
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, tokenId %d, model %s pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, summary.ModelName, relayInfo.FinalPreConsumedQuota))
} else {
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, summary.Quota)
model.UpdateChannelUsedQuota(relayInfo.ChannelId, summary.Quota)
}
if err := SettleBilling(ctx, relayInfo, summary.Quota); err != nil {
logger.LogError(ctx, "error settling billing: "+err.Error())
}
logModel := summary.ModelName
if strings.HasPrefix(logModel, "gpt-4-gizmo") {
logModel = "gpt-4-gizmo-*"
extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName))
}
if strings.HasPrefix(logModel, "gpt-4o-gizmo") {
logModel = "gpt-4o-gizmo-*"
extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName))
}
logContent := strings.Join(extraContent, ", ")
var other map[string]interface{}
if summary.IsClaudeUsageSemantic {
other = GenerateClaudeOtherInfo(ctx, relayInfo,
summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio,
summary.CacheTokens, summary.CacheRatio,
summary.CacheCreationTokens, summary.CacheCreationRatio,
summary.CacheCreationTokens5m, summary.CacheCreationRatio5m,
summary.CacheCreationTokens1h, summary.CacheCreationRatio1h,
summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
other["usage_semantic"] = "anthropic"
} else {
other = GenerateTextOtherInfo(ctx, relayInfo, summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio, summary.CacheTokens, summary.CacheRatio, summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
}
if adminRejectReason != "" {
other["reject_reason"] = adminRejectReason
}
if summary.ImageTokens != 0 {
other["image"] = true
other["image_ratio"] = summary.ImageRatio
other["image_output"] = summary.ImageTokens
}
if summary.WebSearchCallCount > 0 {
other["web_search"] = true
other["web_search_call_count"] = summary.WebSearchCallCount
other["web_search_price"] = summary.WebSearchPrice
} else if summary.ClaudeWebSearchCallCount > 0 {
other["web_search"] = true
other["web_search_call_count"] = summary.ClaudeWebSearchCallCount
other["web_search_price"] = summary.ClaudeWebSearchPrice
}
if summary.FileSearchCallCount > 0 {
other["file_search"] = true
other["file_search_call_count"] = summary.FileSearchCallCount
other["file_search_price"] = summary.FileSearchPrice
}
if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 {
other["audio_input_seperate_price"] = true
other["audio_input_token_count"] = summary.AudioTokens
other["audio_input_price"] = summary.AudioInputPrice
}
if summary.ImageGenerationCallPrice > 0 {
other["image_generation_call"] = true
other["image_generation_call_price"] = summary.ImageGenerationCallPrice
}
if summary.CacheCreationTokens > 0 {
other["cache_creation_tokens"] = summary.CacheCreationTokens
other["cache_creation_ratio"] = summary.CacheCreationRatio
}
if summary.CacheCreationTokens5m > 0 {
other["cache_creation_tokens_5m"] = summary.CacheCreationTokens5m
other["cache_creation_ratio_5m"] = summary.CacheCreationRatio5m
}
if summary.CacheCreationTokens1h > 0 {
other["cache_creation_tokens_1h"] = summary.CacheCreationTokens1h
other["cache_creation_ratio_1h"] = summary.CacheCreationRatio1h
}
cacheWriteTokens := cacheWriteTokensTotal(summary)
if cacheWriteTokens > 0 {
// cache_write_tokens: normalized cache creation total for UI display.
// If split 5m/1h values are present, this is their sum; otherwise it falls back
// to cache_creation_tokens.
other["cache_write_tokens"] = cacheWriteTokens
}
if relayInfo.GetFinalRequestRelayFormat() != types.RelayFormatClaude && usage != nil && usage.UsageSource != "" && usage.InputTokens > 0 {
// input_tokens_total: explicit normalized total input used by the usage log UI.
// Only write this field when upstream/current conversion has already provided a
// reliable total input value and tagged the usage source. Do not infer it from
// prompt/cache fields here, otherwise old upstream payloads may be double-counted.
other["input_tokens_total"] = usage.InputTokens
}
model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
ChannelId: relayInfo.ChannelId,
PromptTokens: summary.PromptTokens,
CompletionTokens: summary.CompletionTokens,
ModelName: logModel,
TokenName: summary.TokenName,
Quota: summary.Quota,
Content: logContent,
TokenId: relayInfo.TokenId,
UseTimeSeconds: int(summary.UseTimeSeconds),
IsStream: relayInfo.IsStream,
Group: relayInfo.UsingGroup,
Other: other,
})
}

206
service/text_quota_test.go Normal file
View File

@@ -0,0 +1,206 @@
package service
import (
"net/http/httptest"
"testing"
"time"
"github.com/QuantumNous/new-api/dto"
relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/require"
)
func TestCalculateTextQuotaSummaryUnifiedForClaudeSemantic(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
usage := &dto.Usage{
PromptTokens: 1000,
CompletionTokens: 200,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 100,
CachedCreationTokens: 50,
},
ClaudeCacheCreation5mTokens: 10,
ClaudeCacheCreation1hTokens: 20,
}
priceData := types.PriceData{
ModelRatio: 1,
CompletionRatio: 2,
CacheRatio: 0.1,
CacheCreationRatio: 1.25,
CacheCreation5mRatio: 1.25,
CacheCreation1hRatio: 2,
GroupRatioInfo: types.GroupRatioInfo{
GroupRatio: 1,
},
}
chatRelayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
FinalRequestRelayFormat: types.RelayFormatClaude,
OriginModelName: "claude-3-7-sonnet",
PriceData: priceData,
StartTime: time.Now(),
}
messageRelayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatClaude,
FinalRequestRelayFormat: types.RelayFormatClaude,
OriginModelName: "claude-3-7-sonnet",
PriceData: priceData,
StartTime: time.Now(),
}
chatSummary := calculateTextQuotaSummary(ctx, chatRelayInfo, usage)
messageSummary := calculateTextQuotaSummary(ctx, messageRelayInfo, usage)
require.Equal(t, messageSummary.Quota, chatSummary.Quota)
require.Equal(t, messageSummary.CacheCreationTokens5m, chatSummary.CacheCreationTokens5m)
require.Equal(t, messageSummary.CacheCreationTokens1h, chatSummary.CacheCreationTokens1h)
require.True(t, chatSummary.IsClaudeUsageSemantic)
require.Equal(t, 1488, chatSummary.Quota)
}
func TestCalculateTextQuotaSummaryUsesSplitClaudeCacheCreationRatios(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
relayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
FinalRequestRelayFormat: types.RelayFormatClaude,
OriginModelName: "claude-3-7-sonnet",
PriceData: types.PriceData{
ModelRatio: 1,
CompletionRatio: 1,
CacheRatio: 0,
CacheCreationRatio: 1,
CacheCreation5mRatio: 2,
CacheCreation1hRatio: 3,
GroupRatioInfo: types.GroupRatioInfo{
GroupRatio: 1,
},
},
StartTime: time.Now(),
}
usage := &dto.Usage{
PromptTokens: 100,
CompletionTokens: 0,
PromptTokensDetails: dto.InputTokenDetails{
CachedCreationTokens: 10,
},
ClaudeCacheCreation5mTokens: 2,
ClaudeCacheCreation1hTokens: 3,
}
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
// 100 + remaining(5)*1 + 2*2 + 3*3 = 118
require.Equal(t, 118, summary.Quota)
}
func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
relayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
OriginModelName: "claude-3-7-sonnet",
PriceData: types.PriceData{
ModelRatio: 1,
CompletionRatio: 2,
CacheRatio: 0.1,
CacheCreationRatio: 1.25,
CacheCreation5mRatio: 1.25,
CacheCreation1hRatio: 2,
GroupRatioInfo: types.GroupRatioInfo{
GroupRatio: 1,
},
},
StartTime: time.Now(),
}
usage := &dto.Usage{
PromptTokens: 1000,
CompletionTokens: 200,
UsageSemantic: "anthropic",
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 100,
CachedCreationTokens: 50,
},
ClaudeCacheCreation5mTokens: 10,
ClaudeCacheCreation1hTokens: 20,
}
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
require.True(t, summary.IsClaudeUsageSemantic)
require.Equal(t, "anthropic", summary.UsageSemantic)
require.Equal(t, 1488, summary.Quota)
}
func TestCacheWriteTokensTotal(t *testing.T) {
t.Run("split cache creation", func(t *testing.T) {
summary := textQuotaSummary{
CacheCreationTokens: 50,
CacheCreationTokens5m: 10,
CacheCreationTokens1h: 20,
}
require.Equal(t, 50, cacheWriteTokensTotal(summary))
})
t.Run("legacy cache creation", func(t *testing.T) {
summary := textQuotaSummary{CacheCreationTokens: 50}
require.Equal(t, 50, cacheWriteTokensTotal(summary))
})
t.Run("split cache creation without aggregate remainder", func(t *testing.T) {
summary := textQuotaSummary{
CacheCreationTokens5m: 10,
CacheCreationTokens1h: 20,
}
require.Equal(t, 30, cacheWriteTokensTotal(summary))
})
}
func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testing.T) {
gin.SetMode(gin.TestMode)
w := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(w)
relayInfo := &relaycommon.RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
OriginModelName: "claude-3-7-sonnet",
PriceData: types.PriceData{
ModelRatio: 1,
CompletionRatio: 5,
CacheRatio: 0.1,
CacheCreationRatio: 1.25,
CacheCreation5mRatio: 1.25,
CacheCreation1hRatio: 2,
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
},
StartTime: time.Now(),
}
usage := &dto.Usage{
PromptTokens: 62,
CompletionTokens: 95,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 3544,
},
ClaudeCacheCreation5mTokens: 586,
}
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
require.Equal(t, 1624, summary.Quota)
}