Merge pull request #3400 from seefs001/fix/openai-usage
refactor: optimize billing flow for OpenAI-to-Anthropic convert
This commit is contained in:
@@ -223,6 +223,25 @@ func generateStopBlock(index int) *dto.ClaudeResponse {
|
||||
}
|
||||
}
|
||||
|
||||
func buildClaudeUsageFromOpenAIUsage(oaiUsage *dto.Usage) *dto.ClaudeUsage {
|
||||
if oaiUsage == nil {
|
||||
return nil
|
||||
}
|
||||
usage := &dto.ClaudeUsage{
|
||||
InputTokens: oaiUsage.PromptTokens,
|
||||
OutputTokens: oaiUsage.CompletionTokens,
|
||||
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
||||
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
||||
}
|
||||
if oaiUsage.ClaudeCacheCreation5mTokens > 0 || oaiUsage.ClaudeCacheCreation1hTokens > 0 {
|
||||
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{
|
||||
Ephemeral5mInputTokens: oaiUsage.ClaudeCacheCreation5mTokens,
|
||||
Ephemeral1hInputTokens: oaiUsage.ClaudeCacheCreation1hTokens,
|
||||
}
|
||||
}
|
||||
return usage
|
||||
}
|
||||
|
||||
func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse {
|
||||
if info.ClaudeConvertInfo.Done {
|
||||
return nil
|
||||
@@ -391,13 +410,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
|
||||
}
|
||||
if oaiUsage != nil {
|
||||
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
||||
Type: "message_delta",
|
||||
Usage: &dto.ClaudeUsage{
|
||||
InputTokens: oaiUsage.PromptTokens,
|
||||
OutputTokens: oaiUsage.CompletionTokens,
|
||||
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
||||
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
||||
},
|
||||
Type: "message_delta",
|
||||
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
|
||||
Delta: &dto.ClaudeMediaMessage{
|
||||
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
||||
},
|
||||
@@ -419,13 +433,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
|
||||
oaiUsage := info.ClaudeConvertInfo.Usage
|
||||
if oaiUsage != nil {
|
||||
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
||||
Type: "message_delta",
|
||||
Usage: &dto.ClaudeUsage{
|
||||
InputTokens: oaiUsage.PromptTokens,
|
||||
OutputTokens: oaiUsage.CompletionTokens,
|
||||
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
||||
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
||||
},
|
||||
Type: "message_delta",
|
||||
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
|
||||
Delta: &dto.ClaudeMediaMessage{
|
||||
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
||||
},
|
||||
@@ -555,13 +564,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
|
||||
}
|
||||
if oaiUsage != nil {
|
||||
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
||||
Type: "message_delta",
|
||||
Usage: &dto.ClaudeUsage{
|
||||
InputTokens: oaiUsage.PromptTokens,
|
||||
OutputTokens: oaiUsage.CompletionTokens,
|
||||
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
||||
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
||||
},
|
||||
Type: "message_delta",
|
||||
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
|
||||
Delta: &dto.ClaudeMediaMessage{
|
||||
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
||||
},
|
||||
|
||||
@@ -73,6 +73,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m
|
||||
other["admin_info"] = adminInfo
|
||||
appendRequestPath(ctx, relayInfo, other)
|
||||
appendRequestConversionChain(relayInfo, other)
|
||||
appendFinalRequestFormat(relayInfo, other)
|
||||
appendBillingInfo(relayInfo, other)
|
||||
appendParamOverrideInfo(relayInfo, other)
|
||||
return other
|
||||
@@ -167,6 +168,17 @@ func appendRequestConversionChain(relayInfo *relaycommon.RelayInfo, other map[st
|
||||
other["request_conversion"] = chain
|
||||
}
|
||||
|
||||
func appendFinalRequestFormat(relayInfo *relaycommon.RelayInfo, other map[string]interface{}) {
|
||||
if relayInfo == nil || other == nil {
|
||||
return
|
||||
}
|
||||
if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
|
||||
// claude indicates the final upstream request format is Claude Messages.
|
||||
// Frontend log rendering uses this to keep the original Claude input display.
|
||||
other["claude"] = true
|
||||
}
|
||||
}
|
||||
|
||||
func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice, userGroupRatio float64) map[string]interface{} {
|
||||
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice, userGroupRatio)
|
||||
info["ws"] = true
|
||||
|
||||
102
service/quota.go
102
service/quota.go
@@ -235,108 +235,6 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
|
||||
})
|
||||
}
|
||||
|
||||
func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) {
|
||||
if usage != nil {
|
||||
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
|
||||
}
|
||||
|
||||
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
||||
promptTokens := usage.PromptTokens
|
||||
completionTokens := usage.CompletionTokens
|
||||
modelName := relayInfo.OriginModelName
|
||||
|
||||
tokenName := ctx.GetString("token_name")
|
||||
completionRatio := relayInfo.PriceData.CompletionRatio
|
||||
modelRatio := relayInfo.PriceData.ModelRatio
|
||||
groupRatio := relayInfo.PriceData.GroupRatioInfo.GroupRatio
|
||||
modelPrice := relayInfo.PriceData.ModelPrice
|
||||
cacheRatio := relayInfo.PriceData.CacheRatio
|
||||
cacheTokens := usage.PromptTokensDetails.CachedTokens
|
||||
|
||||
cacheCreationRatio := relayInfo.PriceData.CacheCreationRatio
|
||||
cacheCreationRatio5m := relayInfo.PriceData.CacheCreation5mRatio
|
||||
cacheCreationRatio1h := relayInfo.PriceData.CacheCreation1hRatio
|
||||
cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
|
||||
cacheCreationTokens5m := usage.ClaudeCacheCreation5mTokens
|
||||
cacheCreationTokens1h := usage.ClaudeCacheCreation1hTokens
|
||||
|
||||
if relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
|
||||
promptTokens -= cacheTokens
|
||||
isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(modelName, relayInfo.PriceData.ModelRatio)
|
||||
if cacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
|
||||
maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData)
|
||||
if maybeCacheCreationTokens >= 0 && promptTokens >= maybeCacheCreationTokens {
|
||||
cacheCreationTokens = maybeCacheCreationTokens
|
||||
}
|
||||
}
|
||||
promptTokens -= cacheCreationTokens
|
||||
}
|
||||
|
||||
calculateQuota := 0.0
|
||||
if !relayInfo.PriceData.UsePrice {
|
||||
calculateQuota = float64(promptTokens)
|
||||
calculateQuota += float64(cacheTokens) * cacheRatio
|
||||
calculateQuota += float64(cacheCreationTokens5m) * cacheCreationRatio5m
|
||||
calculateQuota += float64(cacheCreationTokens1h) * cacheCreationRatio1h
|
||||
remainingCacheCreationTokens := cacheCreationTokens - cacheCreationTokens5m - cacheCreationTokens1h
|
||||
if remainingCacheCreationTokens > 0 {
|
||||
calculateQuota += float64(remainingCacheCreationTokens) * cacheCreationRatio
|
||||
}
|
||||
calculateQuota += float64(completionTokens) * completionRatio
|
||||
calculateQuota = calculateQuota * groupRatio * modelRatio
|
||||
} else {
|
||||
calculateQuota = modelPrice * common.QuotaPerUnit * groupRatio
|
||||
}
|
||||
|
||||
if modelRatio != 0 && calculateQuota <= 0 {
|
||||
calculateQuota = 1
|
||||
}
|
||||
|
||||
quota := int(calculateQuota)
|
||||
|
||||
totalTokens := promptTokens + completionTokens
|
||||
|
||||
var logContent string
|
||||
// record all the consume log even if quota is 0
|
||||
if totalTokens == 0 {
|
||||
// in this case, must be some error happened
|
||||
// we cannot just return, because we may have to return the pre-consumed quota
|
||||
quota = 0
|
||||
logContent += fmt.Sprintf("(可能是上游出错)")
|
||||
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
||||
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
|
||||
} else {
|
||||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
||||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
||||
}
|
||||
|
||||
if err := SettleBilling(ctx, relayInfo, quota); err != nil {
|
||||
logger.LogError(ctx, "error settling billing: "+err.Error())
|
||||
}
|
||||
|
||||
other := GenerateClaudeOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio,
|
||||
cacheTokens, cacheRatio,
|
||||
cacheCreationTokens, cacheCreationRatio,
|
||||
cacheCreationTokens5m, cacheCreationRatio5m,
|
||||
cacheCreationTokens1h, cacheCreationRatio1h,
|
||||
modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
|
||||
model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
|
||||
ChannelId: relayInfo.ChannelId,
|
||||
PromptTokens: promptTokens,
|
||||
CompletionTokens: completionTokens,
|
||||
ModelName: modelName,
|
||||
TokenName: tokenName,
|
||||
Quota: quota,
|
||||
Content: logContent,
|
||||
TokenId: relayInfo.TokenId,
|
||||
UseTimeSeconds: int(useTimeSeconds),
|
||||
IsStream: relayInfo.IsStream,
|
||||
Group: relayInfo.UsingGroup,
|
||||
Other: other,
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData types.PriceData) int {
|
||||
if priceData.CacheCreationRatio == 1 {
|
||||
return 0
|
||||
|
||||
427
service/text_quota.go
Normal file
427
service/text_quota.go
Normal file
@@ -0,0 +1,427 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/QuantumNous/new-api/common"
|
||||
"github.com/QuantumNous/new-api/constant"
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
"github.com/QuantumNous/new-api/logger"
|
||||
"github.com/QuantumNous/new-api/model"
|
||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||
"github.com/QuantumNous/new-api/setting/operation_setting"
|
||||
"github.com/QuantumNous/new-api/types"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/shopspring/decimal"
|
||||
)
|
||||
|
||||
type textQuotaSummary struct {
|
||||
PromptTokens int
|
||||
CompletionTokens int
|
||||
TotalTokens int
|
||||
CacheTokens int
|
||||
CacheCreationTokens int
|
||||
CacheCreationTokens5m int
|
||||
CacheCreationTokens1h int
|
||||
ImageTokens int
|
||||
AudioTokens int
|
||||
ModelName string
|
||||
TokenName string
|
||||
UseTimeSeconds int64
|
||||
CompletionRatio float64
|
||||
CacheRatio float64
|
||||
ImageRatio float64
|
||||
ModelRatio float64
|
||||
GroupRatio float64
|
||||
ModelPrice float64
|
||||
CacheCreationRatio float64
|
||||
CacheCreationRatio5m float64
|
||||
CacheCreationRatio1h float64
|
||||
Quota int
|
||||
IsClaudeUsageSemantic bool
|
||||
UsageSemantic string
|
||||
WebSearchPrice float64
|
||||
WebSearchCallCount int
|
||||
ClaudeWebSearchPrice float64
|
||||
ClaudeWebSearchCallCount int
|
||||
FileSearchPrice float64
|
||||
FileSearchCallCount int
|
||||
AudioInputPrice float64
|
||||
ImageGenerationCallPrice float64
|
||||
}
|
||||
|
||||
func cacheWriteTokensTotal(summary textQuotaSummary) int {
|
||||
if summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0 {
|
||||
splitCacheWriteTokens := summary.CacheCreationTokens5m + summary.CacheCreationTokens1h
|
||||
if summary.CacheCreationTokens > splitCacheWriteTokens {
|
||||
return summary.CacheCreationTokens
|
||||
}
|
||||
return splitCacheWriteTokens
|
||||
}
|
||||
return summary.CacheCreationTokens
|
||||
}
|
||||
|
||||
func isLegacyClaudeDerivedOpenAIUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) bool {
|
||||
if relayInfo == nil || usage == nil {
|
||||
return false
|
||||
}
|
||||
if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
|
||||
return false
|
||||
}
|
||||
if usage.UsageSource != "" || usage.UsageSemantic != "" {
|
||||
return false
|
||||
}
|
||||
return usage.ClaudeCacheCreation5mTokens > 0 || usage.ClaudeCacheCreation1hTokens > 0
|
||||
}
|
||||
|
||||
func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) textQuotaSummary {
|
||||
summary := textQuotaSummary{
|
||||
ModelName: relayInfo.OriginModelName,
|
||||
TokenName: ctx.GetString("token_name"),
|
||||
UseTimeSeconds: time.Now().Unix() - relayInfo.StartTime.Unix(),
|
||||
CompletionRatio: relayInfo.PriceData.CompletionRatio,
|
||||
CacheRatio: relayInfo.PriceData.CacheRatio,
|
||||
ImageRatio: relayInfo.PriceData.ImageRatio,
|
||||
ModelRatio: relayInfo.PriceData.ModelRatio,
|
||||
GroupRatio: relayInfo.PriceData.GroupRatioInfo.GroupRatio,
|
||||
ModelPrice: relayInfo.PriceData.ModelPrice,
|
||||
CacheCreationRatio: relayInfo.PriceData.CacheCreationRatio,
|
||||
CacheCreationRatio5m: relayInfo.PriceData.CacheCreation5mRatio,
|
||||
CacheCreationRatio1h: relayInfo.PriceData.CacheCreation1hRatio,
|
||||
UsageSemantic: usageSemanticFromUsage(relayInfo, usage),
|
||||
}
|
||||
summary.IsClaudeUsageSemantic = summary.UsageSemantic == "anthropic"
|
||||
|
||||
if usage == nil {
|
||||
usage = &dto.Usage{
|
||||
PromptTokens: relayInfo.GetEstimatePromptTokens(),
|
||||
CompletionTokens: 0,
|
||||
TotalTokens: relayInfo.GetEstimatePromptTokens(),
|
||||
}
|
||||
}
|
||||
|
||||
summary.PromptTokens = usage.PromptTokens
|
||||
summary.CompletionTokens = usage.CompletionTokens
|
||||
summary.TotalTokens = usage.PromptTokens + usage.CompletionTokens
|
||||
summary.CacheTokens = usage.PromptTokensDetails.CachedTokens
|
||||
summary.CacheCreationTokens = usage.PromptTokensDetails.CachedCreationTokens
|
||||
summary.CacheCreationTokens5m = usage.ClaudeCacheCreation5mTokens
|
||||
summary.CacheCreationTokens1h = usage.ClaudeCacheCreation1hTokens
|
||||
summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
|
||||
summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
|
||||
legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
|
||||
|
||||
if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
|
||||
summary.PromptTokens -= summary.CacheTokens
|
||||
isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio)
|
||||
if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
|
||||
maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData)
|
||||
if maybeCacheCreationTokens >= 0 && summary.PromptTokens >= maybeCacheCreationTokens {
|
||||
summary.CacheCreationTokens = maybeCacheCreationTokens
|
||||
}
|
||||
}
|
||||
summary.PromptTokens -= summary.CacheCreationTokens
|
||||
}
|
||||
|
||||
dPromptTokens := decimal.NewFromInt(int64(summary.PromptTokens))
|
||||
dCacheTokens := decimal.NewFromInt(int64(summary.CacheTokens))
|
||||
dImageTokens := decimal.NewFromInt(int64(summary.ImageTokens))
|
||||
dAudioTokens := decimal.NewFromInt(int64(summary.AudioTokens))
|
||||
dCompletionTokens := decimal.NewFromInt(int64(summary.CompletionTokens))
|
||||
dCachedCreationTokens := decimal.NewFromInt(int64(summary.CacheCreationTokens))
|
||||
dCompletionRatio := decimal.NewFromFloat(summary.CompletionRatio)
|
||||
dCacheRatio := decimal.NewFromFloat(summary.CacheRatio)
|
||||
dImageRatio := decimal.NewFromFloat(summary.ImageRatio)
|
||||
dModelRatio := decimal.NewFromFloat(summary.ModelRatio)
|
||||
dGroupRatio := decimal.NewFromFloat(summary.GroupRatio)
|
||||
dModelPrice := decimal.NewFromFloat(summary.ModelPrice)
|
||||
dCacheCreationRatio := decimal.NewFromFloat(summary.CacheCreationRatio)
|
||||
dCacheCreationRatio5m := decimal.NewFromFloat(summary.CacheCreationRatio5m)
|
||||
dCacheCreationRatio1h := decimal.NewFromFloat(summary.CacheCreationRatio1h)
|
||||
dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
|
||||
|
||||
ratio := dModelRatio.Mul(dGroupRatio)
|
||||
|
||||
var dWebSearchQuota decimal.Decimal
|
||||
if relayInfo.ResponsesUsageInfo != nil {
|
||||
if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 {
|
||||
summary.WebSearchCallCount = webSearchTool.CallCount
|
||||
summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, webSearchTool.SearchContextSize)
|
||||
dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice).
|
||||
Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))).
|
||||
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||
}
|
||||
} else if strings.HasSuffix(summary.ModelName, "search-preview") {
|
||||
searchContextSize := ctx.GetString("chat_completion_web_search_context_size")
|
||||
if searchContextSize == "" {
|
||||
searchContextSize = "medium"
|
||||
}
|
||||
summary.WebSearchCallCount = 1
|
||||
summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, searchContextSize)
|
||||
dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice).
|
||||
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||
}
|
||||
|
||||
var dClaudeWebSearchQuota decimal.Decimal
|
||||
summary.ClaudeWebSearchCallCount = ctx.GetInt("claude_web_search_requests")
|
||||
if summary.ClaudeWebSearchCallCount > 0 {
|
||||
summary.ClaudeWebSearchPrice = operation_setting.GetClaudeWebSearchPricePerThousand()
|
||||
dClaudeWebSearchQuota = decimal.NewFromFloat(summary.ClaudeWebSearchPrice).
|
||||
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit).
|
||||
Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount)))
|
||||
}
|
||||
|
||||
var dFileSearchQuota decimal.Decimal
|
||||
if relayInfo.ResponsesUsageInfo != nil {
|
||||
if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 {
|
||||
summary.FileSearchCallCount = fileSearchTool.CallCount
|
||||
summary.FileSearchPrice = operation_setting.GetFileSearchPricePerThousand()
|
||||
dFileSearchQuota = decimal.NewFromFloat(summary.FileSearchPrice).
|
||||
Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
|
||||
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||
}
|
||||
}
|
||||
|
||||
var dImageGenerationCallQuota decimal.Decimal
|
||||
if ctx.GetBool("image_generation_call") {
|
||||
summary.ImageGenerationCallPrice = operation_setting.GetGPTImage1PriceOnceCall(ctx.GetString("image_generation_call_quality"), ctx.GetString("image_generation_call_size"))
|
||||
dImageGenerationCallQuota = decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||
}
|
||||
|
||||
var audioInputQuota decimal.Decimal
|
||||
if !relayInfo.PriceData.UsePrice {
|
||||
baseTokens := dPromptTokens
|
||||
|
||||
var cachedTokensWithRatio decimal.Decimal
|
||||
if !dCacheTokens.IsZero() {
|
||||
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
|
||||
baseTokens = baseTokens.Sub(dCacheTokens)
|
||||
}
|
||||
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
|
||||
}
|
||||
|
||||
var cachedCreationTokensWithRatio decimal.Decimal
|
||||
hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0
|
||||
if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens {
|
||||
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
|
||||
baseTokens = baseTokens.Sub(dCachedCreationTokens)
|
||||
cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio)
|
||||
} else {
|
||||
remaining := summary.CacheCreationTokens - summary.CacheCreationTokens5m - summary.CacheCreationTokens1h
|
||||
if remaining < 0 {
|
||||
remaining = 0
|
||||
}
|
||||
cachedCreationTokensWithRatio = decimal.NewFromInt(int64(remaining)).Mul(dCacheCreationRatio)
|
||||
cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens5m)).Mul(dCacheCreationRatio5m))
|
||||
cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens1h)).Mul(dCacheCreationRatio1h))
|
||||
}
|
||||
}
|
||||
|
||||
var imageTokensWithRatio decimal.Decimal
|
||||
if !dImageTokens.IsZero() {
|
||||
baseTokens = baseTokens.Sub(dImageTokens)
|
||||
imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
|
||||
}
|
||||
|
||||
if !dAudioTokens.IsZero() {
|
||||
summary.AudioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(summary.ModelName)
|
||||
if summary.AudioInputPrice > 0 {
|
||||
baseTokens = baseTokens.Sub(dAudioTokens)
|
||||
audioInputQuota = decimal.NewFromFloat(summary.AudioInputPrice).
|
||||
Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||
}
|
||||
}
|
||||
|
||||
promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio).Add(cachedCreationTokensWithRatio)
|
||||
completionQuota := dCompletionTokens.Mul(dCompletionRatio)
|
||||
quotaCalculateDecimal := promptQuota.Add(completionQuota).Mul(ratio)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
|
||||
|
||||
if len(relayInfo.PriceData.OtherRatios) > 0 {
|
||||
for _, otherRatio := range relayInfo.PriceData.OtherRatios {
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio))
|
||||
}
|
||||
}
|
||||
|
||||
if !ratio.IsZero() && quotaCalculateDecimal.LessThanOrEqual(decimal.Zero) {
|
||||
quotaCalculateDecimal = decimal.NewFromInt(1)
|
||||
}
|
||||
summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart())
|
||||
} else {
|
||||
quotaCalculateDecimal := dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
|
||||
if len(relayInfo.PriceData.OtherRatios) > 0 {
|
||||
for _, otherRatio := range relayInfo.PriceData.OtherRatios {
|
||||
quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio))
|
||||
}
|
||||
}
|
||||
summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart())
|
||||
}
|
||||
|
||||
if summary.TotalTokens == 0 {
|
||||
summary.Quota = 0
|
||||
} else if !ratio.IsZero() && summary.Quota == 0 {
|
||||
summary.Quota = 1
|
||||
}
|
||||
|
||||
return summary
|
||||
}
|
||||
|
||||
func usageSemanticFromUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) string {
|
||||
if usage != nil && usage.UsageSemantic != "" {
|
||||
return usage.UsageSemantic
|
||||
}
|
||||
if relayInfo != nil && relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
|
||||
return "anthropic"
|
||||
}
|
||||
return "openai"
|
||||
}
|
||||
|
||||
func PostTextConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent []string) {
|
||||
originUsage := usage
|
||||
if usage == nil {
|
||||
extraContent = append(extraContent, "上游无计费信息")
|
||||
}
|
||||
if originUsage != nil {
|
||||
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
|
||||
}
|
||||
|
||||
adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
|
||||
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||
|
||||
if summary.WebSearchCallCount > 0 {
|
||||
extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 %d 次,调用花费 %s", summary.WebSearchCallCount, decimal.NewFromFloat(summary.WebSearchPrice).Mul(decimal.NewFromInt(int64(summary.WebSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
|
||||
}
|
||||
if summary.ClaudeWebSearchCallCount > 0 {
|
||||
extraContent = append(extraContent, fmt.Sprintf("Claude Web Search 调用 %d 次,调用花费 %s", summary.ClaudeWebSearchCallCount, decimal.NewFromFloat(summary.ClaudeWebSearchPrice).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount))).String()))
|
||||
}
|
||||
if summary.FileSearchCallCount > 0 {
|
||||
extraContent = append(extraContent, fmt.Sprintf("File Search 调用 %d 次,调用花费 %s", summary.FileSearchCallCount, decimal.NewFromFloat(summary.FileSearchPrice).Mul(decimal.NewFromInt(int64(summary.FileSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
|
||||
}
|
||||
if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 {
|
||||
extraContent = append(extraContent, fmt.Sprintf("Audio Input 花费 %s", decimal.NewFromFloat(summary.AudioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(decimal.NewFromInt(int64(summary.AudioTokens))).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
|
||||
}
|
||||
if summary.ImageGenerationCallPrice > 0 {
|
||||
extraContent = append(extraContent, fmt.Sprintf("Image Generation Call 花费 %s", decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
|
||||
}
|
||||
|
||||
if summary.TotalTokens == 0 {
|
||||
extraContent = append(extraContent, "上游没有返回计费信息,无法扣费(可能是上游超时)")
|
||||
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, summary.ModelName, relayInfo.FinalPreConsumedQuota))
|
||||
} else {
|
||||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, summary.Quota)
|
||||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, summary.Quota)
|
||||
}
|
||||
|
||||
if err := SettleBilling(ctx, relayInfo, summary.Quota); err != nil {
|
||||
logger.LogError(ctx, "error settling billing: "+err.Error())
|
||||
}
|
||||
|
||||
logModel := summary.ModelName
|
||||
if strings.HasPrefix(logModel, "gpt-4-gizmo") {
|
||||
logModel = "gpt-4-gizmo-*"
|
||||
extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName))
|
||||
}
|
||||
if strings.HasPrefix(logModel, "gpt-4o-gizmo") {
|
||||
logModel = "gpt-4o-gizmo-*"
|
||||
extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName))
|
||||
}
|
||||
|
||||
logContent := strings.Join(extraContent, ", ")
|
||||
var other map[string]interface{}
|
||||
if summary.IsClaudeUsageSemantic {
|
||||
other = GenerateClaudeOtherInfo(ctx, relayInfo,
|
||||
summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio,
|
||||
summary.CacheTokens, summary.CacheRatio,
|
||||
summary.CacheCreationTokens, summary.CacheCreationRatio,
|
||||
summary.CacheCreationTokens5m, summary.CacheCreationRatio5m,
|
||||
summary.CacheCreationTokens1h, summary.CacheCreationRatio1h,
|
||||
summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
|
||||
other["usage_semantic"] = "anthropic"
|
||||
} else {
|
||||
other = GenerateTextOtherInfo(ctx, relayInfo, summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio, summary.CacheTokens, summary.CacheRatio, summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
|
||||
}
|
||||
if adminRejectReason != "" {
|
||||
other["reject_reason"] = adminRejectReason
|
||||
}
|
||||
if summary.ImageTokens != 0 {
|
||||
other["image"] = true
|
||||
other["image_ratio"] = summary.ImageRatio
|
||||
other["image_output"] = summary.ImageTokens
|
||||
}
|
||||
if summary.WebSearchCallCount > 0 {
|
||||
other["web_search"] = true
|
||||
other["web_search_call_count"] = summary.WebSearchCallCount
|
||||
other["web_search_price"] = summary.WebSearchPrice
|
||||
} else if summary.ClaudeWebSearchCallCount > 0 {
|
||||
other["web_search"] = true
|
||||
other["web_search_call_count"] = summary.ClaudeWebSearchCallCount
|
||||
other["web_search_price"] = summary.ClaudeWebSearchPrice
|
||||
}
|
||||
if summary.FileSearchCallCount > 0 {
|
||||
other["file_search"] = true
|
||||
other["file_search_call_count"] = summary.FileSearchCallCount
|
||||
other["file_search_price"] = summary.FileSearchPrice
|
||||
}
|
||||
if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 {
|
||||
other["audio_input_seperate_price"] = true
|
||||
other["audio_input_token_count"] = summary.AudioTokens
|
||||
other["audio_input_price"] = summary.AudioInputPrice
|
||||
}
|
||||
if summary.ImageGenerationCallPrice > 0 {
|
||||
other["image_generation_call"] = true
|
||||
other["image_generation_call_price"] = summary.ImageGenerationCallPrice
|
||||
}
|
||||
if summary.CacheCreationTokens > 0 {
|
||||
other["cache_creation_tokens"] = summary.CacheCreationTokens
|
||||
other["cache_creation_ratio"] = summary.CacheCreationRatio
|
||||
}
|
||||
if summary.CacheCreationTokens5m > 0 {
|
||||
other["cache_creation_tokens_5m"] = summary.CacheCreationTokens5m
|
||||
other["cache_creation_ratio_5m"] = summary.CacheCreationRatio5m
|
||||
}
|
||||
if summary.CacheCreationTokens1h > 0 {
|
||||
other["cache_creation_tokens_1h"] = summary.CacheCreationTokens1h
|
||||
other["cache_creation_ratio_1h"] = summary.CacheCreationRatio1h
|
||||
}
|
||||
cacheWriteTokens := cacheWriteTokensTotal(summary)
|
||||
if cacheWriteTokens > 0 {
|
||||
// cache_write_tokens: normalized cache creation total for UI display.
|
||||
// If split 5m/1h values are present, this is their sum; otherwise it falls back
|
||||
// to cache_creation_tokens.
|
||||
other["cache_write_tokens"] = cacheWriteTokens
|
||||
}
|
||||
if relayInfo.GetFinalRequestRelayFormat() != types.RelayFormatClaude && usage != nil && usage.UsageSource != "" && usage.InputTokens > 0 {
|
||||
// input_tokens_total: explicit normalized total input used by the usage log UI.
|
||||
// Only write this field when upstream/current conversion has already provided a
|
||||
// reliable total input value and tagged the usage source. Do not infer it from
|
||||
// prompt/cache fields here, otherwise old upstream payloads may be double-counted.
|
||||
other["input_tokens_total"] = usage.InputTokens
|
||||
}
|
||||
|
||||
model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
|
||||
ChannelId: relayInfo.ChannelId,
|
||||
PromptTokens: summary.PromptTokens,
|
||||
CompletionTokens: summary.CompletionTokens,
|
||||
ModelName: logModel,
|
||||
TokenName: summary.TokenName,
|
||||
Quota: summary.Quota,
|
||||
Content: logContent,
|
||||
TokenId: relayInfo.TokenId,
|
||||
UseTimeSeconds: int(summary.UseTimeSeconds),
|
||||
IsStream: relayInfo.IsStream,
|
||||
Group: relayInfo.UsingGroup,
|
||||
Other: other,
|
||||
})
|
||||
}
|
||||
206
service/text_quota_test.go
Normal file
206
service/text_quota_test.go
Normal file
@@ -0,0 +1,206 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||
"github.com/QuantumNous/new-api/types"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCalculateTextQuotaSummaryUnifiedForClaudeSemantic(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
ctx, _ := gin.CreateTestContext(w)
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: 1000,
|
||||
CompletionTokens: 200,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 100,
|
||||
CachedCreationTokens: 50,
|
||||
},
|
||||
ClaudeCacheCreation5mTokens: 10,
|
||||
ClaudeCacheCreation1hTokens: 20,
|
||||
}
|
||||
|
||||
priceData := types.PriceData{
|
||||
ModelRatio: 1,
|
||||
CompletionRatio: 2,
|
||||
CacheRatio: 0.1,
|
||||
CacheCreationRatio: 1.25,
|
||||
CacheCreation5mRatio: 1.25,
|
||||
CacheCreation1hRatio: 2,
|
||||
GroupRatioInfo: types.GroupRatioInfo{
|
||||
GroupRatio: 1,
|
||||
},
|
||||
}
|
||||
|
||||
chatRelayInfo := &relaycommon.RelayInfo{
|
||||
RelayFormat: types.RelayFormatOpenAI,
|
||||
FinalRequestRelayFormat: types.RelayFormatClaude,
|
||||
OriginModelName: "claude-3-7-sonnet",
|
||||
PriceData: priceData,
|
||||
StartTime: time.Now(),
|
||||
}
|
||||
messageRelayInfo := &relaycommon.RelayInfo{
|
||||
RelayFormat: types.RelayFormatClaude,
|
||||
FinalRequestRelayFormat: types.RelayFormatClaude,
|
||||
OriginModelName: "claude-3-7-sonnet",
|
||||
PriceData: priceData,
|
||||
StartTime: time.Now(),
|
||||
}
|
||||
|
||||
chatSummary := calculateTextQuotaSummary(ctx, chatRelayInfo, usage)
|
||||
messageSummary := calculateTextQuotaSummary(ctx, messageRelayInfo, usage)
|
||||
|
||||
require.Equal(t, messageSummary.Quota, chatSummary.Quota)
|
||||
require.Equal(t, messageSummary.CacheCreationTokens5m, chatSummary.CacheCreationTokens5m)
|
||||
require.Equal(t, messageSummary.CacheCreationTokens1h, chatSummary.CacheCreationTokens1h)
|
||||
require.True(t, chatSummary.IsClaudeUsageSemantic)
|
||||
require.Equal(t, 1488, chatSummary.Quota)
|
||||
}
|
||||
|
||||
func TestCalculateTextQuotaSummaryUsesSplitClaudeCacheCreationRatios(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
ctx, _ := gin.CreateTestContext(w)
|
||||
|
||||
relayInfo := &relaycommon.RelayInfo{
|
||||
RelayFormat: types.RelayFormatOpenAI,
|
||||
FinalRequestRelayFormat: types.RelayFormatClaude,
|
||||
OriginModelName: "claude-3-7-sonnet",
|
||||
PriceData: types.PriceData{
|
||||
ModelRatio: 1,
|
||||
CompletionRatio: 1,
|
||||
CacheRatio: 0,
|
||||
CacheCreationRatio: 1,
|
||||
CacheCreation5mRatio: 2,
|
||||
CacheCreation1hRatio: 3,
|
||||
GroupRatioInfo: types.GroupRatioInfo{
|
||||
GroupRatio: 1,
|
||||
},
|
||||
},
|
||||
StartTime: time.Now(),
|
||||
}
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: 100,
|
||||
CompletionTokens: 0,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedCreationTokens: 10,
|
||||
},
|
||||
ClaudeCacheCreation5mTokens: 2,
|
||||
ClaudeCacheCreation1hTokens: 3,
|
||||
}
|
||||
|
||||
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||
|
||||
// 100 + remaining(5)*1 + 2*2 + 3*3 = 118
|
||||
require.Equal(t, 118, summary.Quota)
|
||||
}
|
||||
|
||||
func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
ctx, _ := gin.CreateTestContext(w)
|
||||
|
||||
relayInfo := &relaycommon.RelayInfo{
|
||||
RelayFormat: types.RelayFormatOpenAI,
|
||||
OriginModelName: "claude-3-7-sonnet",
|
||||
PriceData: types.PriceData{
|
||||
ModelRatio: 1,
|
||||
CompletionRatio: 2,
|
||||
CacheRatio: 0.1,
|
||||
CacheCreationRatio: 1.25,
|
||||
CacheCreation5mRatio: 1.25,
|
||||
CacheCreation1hRatio: 2,
|
||||
GroupRatioInfo: types.GroupRatioInfo{
|
||||
GroupRatio: 1,
|
||||
},
|
||||
},
|
||||
StartTime: time.Now(),
|
||||
}
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: 1000,
|
||||
CompletionTokens: 200,
|
||||
UsageSemantic: "anthropic",
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 100,
|
||||
CachedCreationTokens: 50,
|
||||
},
|
||||
ClaudeCacheCreation5mTokens: 10,
|
||||
ClaudeCacheCreation1hTokens: 20,
|
||||
}
|
||||
|
||||
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||
|
||||
require.True(t, summary.IsClaudeUsageSemantic)
|
||||
require.Equal(t, "anthropic", summary.UsageSemantic)
|
||||
require.Equal(t, 1488, summary.Quota)
|
||||
}
|
||||
|
||||
func TestCacheWriteTokensTotal(t *testing.T) {
|
||||
t.Run("split cache creation", func(t *testing.T) {
|
||||
summary := textQuotaSummary{
|
||||
CacheCreationTokens: 50,
|
||||
CacheCreationTokens5m: 10,
|
||||
CacheCreationTokens1h: 20,
|
||||
}
|
||||
require.Equal(t, 50, cacheWriteTokensTotal(summary))
|
||||
})
|
||||
|
||||
t.Run("legacy cache creation", func(t *testing.T) {
|
||||
summary := textQuotaSummary{CacheCreationTokens: 50}
|
||||
require.Equal(t, 50, cacheWriteTokensTotal(summary))
|
||||
})
|
||||
|
||||
t.Run("split cache creation without aggregate remainder", func(t *testing.T) {
|
||||
summary := textQuotaSummary{
|
||||
CacheCreationTokens5m: 10,
|
||||
CacheCreationTokens1h: 20,
|
||||
}
|
||||
require.Equal(t, 30, cacheWriteTokensTotal(summary))
|
||||
})
|
||||
}
|
||||
|
||||
func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testing.T) {
|
||||
gin.SetMode(gin.TestMode)
|
||||
w := httptest.NewRecorder()
|
||||
ctx, _ := gin.CreateTestContext(w)
|
||||
|
||||
relayInfo := &relaycommon.RelayInfo{
|
||||
RelayFormat: types.RelayFormatOpenAI,
|
||||
OriginModelName: "claude-3-7-sonnet",
|
||||
PriceData: types.PriceData{
|
||||
ModelRatio: 1,
|
||||
CompletionRatio: 5,
|
||||
CacheRatio: 0.1,
|
||||
CacheCreationRatio: 1.25,
|
||||
CacheCreation5mRatio: 1.25,
|
||||
CacheCreation1hRatio: 2,
|
||||
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
|
||||
},
|
||||
StartTime: time.Now(),
|
||||
}
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: 62,
|
||||
CompletionTokens: 95,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 3544,
|
||||
},
|
||||
ClaudeCacheCreation5mTokens: 586,
|
||||
}
|
||||
|
||||
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||
|
||||
// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
|
||||
require.Equal(t, 1624, summary.Quota)
|
||||
}
|
||||
Reference in New Issue
Block a user