Merge pull request #3400 from seefs001/fix/openai-usage
refactor: optimize billing flow for OpenAI-to-Anthropic convert
This commit is contained in:
@@ -220,10 +220,12 @@ type CompletionsStreamResponse struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type Usage struct {
|
type Usage struct {
|
||||||
PromptTokens int `json:"prompt_tokens"`
|
PromptTokens int `json:"prompt_tokens"`
|
||||||
CompletionTokens int `json:"completion_tokens"`
|
CompletionTokens int `json:"completion_tokens"`
|
||||||
TotalTokens int `json:"total_tokens"`
|
TotalTokens int `json:"total_tokens"`
|
||||||
PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"`
|
PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"`
|
||||||
|
UsageSemantic string `json:"usage_semantic,omitempty"`
|
||||||
|
UsageSource string `json:"usage_source,omitempty"`
|
||||||
|
|
||||||
PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"`
|
PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"`
|
||||||
CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
|
CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
|
||||||
@@ -251,7 +253,7 @@ type OpenAIVideoResponse struct {
|
|||||||
|
|
||||||
type InputTokenDetails struct {
|
type InputTokenDetails struct {
|
||||||
CachedTokens int `json:"cached_tokens"`
|
CachedTokens int `json:"cached_tokens"`
|
||||||
CachedCreationTokens int `json:"-"`
|
CachedCreationTokens int `json:"cached_creation_tokens,omitempty"`
|
||||||
TextTokens int `json:"text_tokens"`
|
TextTokens int `json:"text_tokens"`
|
||||||
AudioTokens int `json:"audio_tokens"`
|
AudioTokens int `json:"audio_tokens"`
|
||||||
ImageTokens int `json:"image_tokens"`
|
ImageTokens int `json:"image_tokens"`
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ func AudioHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type
|
|||||||
if usage.(*dto.Usage).CompletionTokenDetails.AudioTokens > 0 || usage.(*dto.Usage).PromptTokensDetails.AudioTokens > 0 {
|
if usage.(*dto.Usage).CompletionTokenDetails.AudioTokens > 0 || usage.(*dto.Usage).PromptTokensDetails.AudioTokens > 0 {
|
||||||
service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
|
service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
|
||||||
} else {
|
} else {
|
||||||
postConsumeQuota(c, info, usage.(*dto.Usage))
|
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|||||||
@@ -555,6 +555,35 @@ type ClaudeResponseInfo struct {
|
|||||||
Done bool
|
Done bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func cacheCreationTokensForOpenAIUsage(usage *dto.Usage) int {
|
||||||
|
if usage == nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
splitCacheCreationTokens := usage.ClaudeCacheCreation5mTokens + usage.ClaudeCacheCreation1hTokens
|
||||||
|
if splitCacheCreationTokens == 0 {
|
||||||
|
return usage.PromptTokensDetails.CachedCreationTokens
|
||||||
|
}
|
||||||
|
if usage.PromptTokensDetails.CachedCreationTokens > splitCacheCreationTokens {
|
||||||
|
return usage.PromptTokensDetails.CachedCreationTokens
|
||||||
|
}
|
||||||
|
return splitCacheCreationTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildOpenAIStyleUsageFromClaudeUsage(usage *dto.Usage) dto.Usage {
|
||||||
|
if usage == nil {
|
||||||
|
return dto.Usage{}
|
||||||
|
}
|
||||||
|
clone := *usage
|
||||||
|
cacheCreationTokens := cacheCreationTokensForOpenAIUsage(usage)
|
||||||
|
totalInputTokens := usage.PromptTokens + usage.PromptTokensDetails.CachedTokens + cacheCreationTokens
|
||||||
|
clone.PromptTokens = totalInputTokens
|
||||||
|
clone.InputTokens = totalInputTokens
|
||||||
|
clone.TotalTokens = totalInputTokens + usage.CompletionTokens
|
||||||
|
clone.UsageSemantic = "openai"
|
||||||
|
clone.UsageSource = "anthropic"
|
||||||
|
return clone
|
||||||
|
}
|
||||||
|
|
||||||
func buildMessageDeltaPatchUsage(claudeResponse *dto.ClaudeResponse, claudeInfo *ClaudeResponseInfo) *dto.ClaudeUsage {
|
func buildMessageDeltaPatchUsage(claudeResponse *dto.ClaudeResponse, claudeInfo *ClaudeResponseInfo) *dto.ClaudeUsage {
|
||||||
usage := &dto.ClaudeUsage{}
|
usage := &dto.ClaudeUsage{}
|
||||||
if claudeResponse != nil && claudeResponse.Usage != nil {
|
if claudeResponse != nil && claudeResponse.Usage != nil {
|
||||||
@@ -643,6 +672,7 @@ func FormatClaudeResponseInfo(claudeResponse *dto.ClaudeResponse, oaiResponse *d
|
|||||||
// message_start, 获取usage
|
// message_start, 获取usage
|
||||||
if claudeResponse.Message != nil && claudeResponse.Message.Usage != nil {
|
if claudeResponse.Message != nil && claudeResponse.Message.Usage != nil {
|
||||||
claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
|
claudeInfo.Usage.PromptTokens = claudeResponse.Message.Usage.InputTokens
|
||||||
|
claudeInfo.Usage.UsageSemantic = "anthropic"
|
||||||
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens
|
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Message.Usage.CacheReadInputTokens
|
||||||
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens
|
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Message.Usage.CacheCreationInputTokens
|
||||||
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Message.Usage.GetCacheCreation5mTokens()
|
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Message.Usage.GetCacheCreation5mTokens()
|
||||||
@@ -661,6 +691,7 @@ func FormatClaudeResponseInfo(claudeResponse *dto.ClaudeResponse, oaiResponse *d
|
|||||||
} else if claudeResponse.Type == "message_delta" {
|
} else if claudeResponse.Type == "message_delta" {
|
||||||
// 最终的usage获取
|
// 最终的usage获取
|
||||||
if claudeResponse.Usage != nil {
|
if claudeResponse.Usage != nil {
|
||||||
|
claudeInfo.Usage.UsageSemantic = "anthropic"
|
||||||
if claudeResponse.Usage.InputTokens > 0 {
|
if claudeResponse.Usage.InputTokens > 0 {
|
||||||
// 不叠加,只取最新的
|
// 不叠加,只取最新的
|
||||||
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
|
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
|
||||||
@@ -754,12 +785,16 @@ func HandleStreamFinalResponse(c *gin.Context, info *relaycommon.RelayInfo, clau
|
|||||||
}
|
}
|
||||||
claudeInfo.Usage = service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
|
claudeInfo.Usage = service.ResponseText2Usage(c, claudeInfo.ResponseText.String(), info.UpstreamModelName, claudeInfo.Usage.PromptTokens)
|
||||||
}
|
}
|
||||||
|
if claudeInfo.Usage != nil {
|
||||||
|
claudeInfo.Usage.UsageSemantic = "anthropic"
|
||||||
|
}
|
||||||
|
|
||||||
if info.RelayFormat == types.RelayFormatClaude {
|
if info.RelayFormat == types.RelayFormatClaude {
|
||||||
//
|
//
|
||||||
} else if info.RelayFormat == types.RelayFormatOpenAI {
|
} else if info.RelayFormat == types.RelayFormatOpenAI {
|
||||||
if info.ShouldIncludeUsage {
|
if info.ShouldIncludeUsage {
|
||||||
response := helper.GenerateFinalUsageResponse(claudeInfo.ResponseId, claudeInfo.Created, info.UpstreamModelName, *claudeInfo.Usage)
|
openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(claudeInfo.Usage)
|
||||||
|
response := helper.GenerateFinalUsageResponse(claudeInfo.ResponseId, claudeInfo.Created, info.UpstreamModelName, openAIUsage)
|
||||||
err := helper.ObjectData(c, response)
|
err := helper.ObjectData(c, response)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
common.SysLog("send final response failed: " + err.Error())
|
common.SysLog("send final response failed: " + err.Error())
|
||||||
@@ -810,6 +845,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
|
|||||||
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
|
claudeInfo.Usage.PromptTokens = claudeResponse.Usage.InputTokens
|
||||||
claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
|
claudeInfo.Usage.CompletionTokens = claudeResponse.Usage.OutputTokens
|
||||||
claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
|
claudeInfo.Usage.TotalTokens = claudeResponse.Usage.InputTokens + claudeResponse.Usage.OutputTokens
|
||||||
|
claudeInfo.Usage.UsageSemantic = "anthropic"
|
||||||
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
|
claudeInfo.Usage.PromptTokensDetails.CachedTokens = claudeResponse.Usage.CacheReadInputTokens
|
||||||
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
|
claudeInfo.Usage.PromptTokensDetails.CachedCreationTokens = claudeResponse.Usage.CacheCreationInputTokens
|
||||||
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Usage.GetCacheCreation5mTokens()
|
claudeInfo.Usage.ClaudeCacheCreation5mTokens = claudeResponse.Usage.GetCacheCreation5mTokens()
|
||||||
@@ -819,7 +855,7 @@ func HandleClaudeResponseData(c *gin.Context, info *relaycommon.RelayInfo, claud
|
|||||||
switch info.RelayFormat {
|
switch info.RelayFormat {
|
||||||
case types.RelayFormatOpenAI:
|
case types.RelayFormatOpenAI:
|
||||||
openaiResponse := ResponseClaude2OpenAI(&claudeResponse)
|
openaiResponse := ResponseClaude2OpenAI(&claudeResponse)
|
||||||
openaiResponse.Usage = *claudeInfo.Usage
|
openaiResponse.Usage = buildOpenAIStyleUsageFromClaudeUsage(claudeInfo.Usage)
|
||||||
responseData, err = json.Marshal(openaiResponse)
|
responseData, err = json.Marshal(openaiResponse)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return types.NewError(err, types.ErrorCodeBadResponseBody)
|
return types.NewError(err, types.ErrorCodeBadResponseBody)
|
||||||
|
|||||||
@@ -173,3 +173,85 @@ func TestFormatClaudeResponseInfo_ContentBlockDelta(t *testing.T) {
|
|||||||
t.Errorf("ResponseText = %q, want %q", claudeInfo.ResponseText.String(), "hello")
|
t.Errorf("ResponseText = %q, want %q", claudeInfo.ResponseText.String(), "hello")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildOpenAIStyleUsageFromClaudeUsage(t *testing.T) {
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 100,
|
||||||
|
CompletionTokens: 20,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 30,
|
||||||
|
CachedCreationTokens: 50,
|
||||||
|
},
|
||||||
|
ClaudeCacheCreation5mTokens: 10,
|
||||||
|
ClaudeCacheCreation1hTokens: 20,
|
||||||
|
UsageSemantic: "anthropic",
|
||||||
|
}
|
||||||
|
|
||||||
|
openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage)
|
||||||
|
|
||||||
|
if openAIUsage.PromptTokens != 180 {
|
||||||
|
t.Fatalf("PromptTokens = %d, want 180", openAIUsage.PromptTokens)
|
||||||
|
}
|
||||||
|
if openAIUsage.InputTokens != 180 {
|
||||||
|
t.Fatalf("InputTokens = %d, want 180", openAIUsage.InputTokens)
|
||||||
|
}
|
||||||
|
if openAIUsage.TotalTokens != 200 {
|
||||||
|
t.Fatalf("TotalTokens = %d, want 200", openAIUsage.TotalTokens)
|
||||||
|
}
|
||||||
|
if openAIUsage.UsageSemantic != "openai" {
|
||||||
|
t.Fatalf("UsageSemantic = %s, want openai", openAIUsage.UsageSemantic)
|
||||||
|
}
|
||||||
|
if openAIUsage.UsageSource != "anthropic" {
|
||||||
|
t.Fatalf("UsageSource = %s, want anthropic", openAIUsage.UsageSource)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuildOpenAIStyleUsageFromClaudeUsagePreservesCacheCreationRemainder(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
cachedCreationTokens int
|
||||||
|
cacheCreationTokens5m int
|
||||||
|
cacheCreationTokens1h int
|
||||||
|
expectedTotalInputToken int
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
name: "prefers aggregate when it includes remainder",
|
||||||
|
cachedCreationTokens: 50,
|
||||||
|
cacheCreationTokens5m: 10,
|
||||||
|
cacheCreationTokens1h: 20,
|
||||||
|
expectedTotalInputToken: 180,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "falls back to split tokens when aggregate missing",
|
||||||
|
cachedCreationTokens: 0,
|
||||||
|
cacheCreationTokens5m: 10,
|
||||||
|
cacheCreationTokens1h: 20,
|
||||||
|
expectedTotalInputToken: 160,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 100,
|
||||||
|
CompletionTokens: 20,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 30,
|
||||||
|
CachedCreationTokens: tt.cachedCreationTokens,
|
||||||
|
},
|
||||||
|
ClaudeCacheCreation5mTokens: tt.cacheCreationTokens5m,
|
||||||
|
ClaudeCacheCreation1hTokens: tt.cacheCreationTokens1h,
|
||||||
|
UsageSemantic: "anthropic",
|
||||||
|
}
|
||||||
|
|
||||||
|
openAIUsage := buildOpenAIStyleUsageFromClaudeUsage(usage)
|
||||||
|
|
||||||
|
if openAIUsage.PromptTokens != tt.expectedTotalInputToken {
|
||||||
|
t.Fatalf("PromptTokens = %d, want %d", openAIUsage.PromptTokens, tt.expectedTotalInputToken)
|
||||||
|
}
|
||||||
|
if openAIUsage.InputTokens != tt.expectedTotalInputToken {
|
||||||
|
t.Fatalf("InputTokens = %d, want %d", openAIUsage.InputTokens, tt.expectedTotalInputToken)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -122,7 +122,7 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
|
|||||||
return newApiErr
|
return newApiErr
|
||||||
}
|
}
|
||||||
|
|
||||||
service.PostClaudeConsumeQuota(c, info, usage)
|
service.PostTextConsumeQuota(c, info, usage, nil)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -190,6 +190,6 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
|
|||||||
return newAPIError
|
return newAPIError
|
||||||
}
|
}
|
||||||
|
|
||||||
service.PostClaudeConsumeQuota(c, info, usage.(*dto.Usage))
|
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,25 +6,20 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"net/http"
|
"net/http"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/QuantumNous/new-api/common"
|
"github.com/QuantumNous/new-api/common"
|
||||||
"github.com/QuantumNous/new-api/constant"
|
"github.com/QuantumNous/new-api/constant"
|
||||||
"github.com/QuantumNous/new-api/dto"
|
"github.com/QuantumNous/new-api/dto"
|
||||||
"github.com/QuantumNous/new-api/logger"
|
"github.com/QuantumNous/new-api/logger"
|
||||||
"github.com/QuantumNous/new-api/model"
|
|
||||||
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||||
relayconstant "github.com/QuantumNous/new-api/relay/constant"
|
relayconstant "github.com/QuantumNous/new-api/relay/constant"
|
||||||
"github.com/QuantumNous/new-api/relay/helper"
|
"github.com/QuantumNous/new-api/relay/helper"
|
||||||
"github.com/QuantumNous/new-api/service"
|
"github.com/QuantumNous/new-api/service"
|
||||||
"github.com/QuantumNous/new-api/setting/model_setting"
|
"github.com/QuantumNous/new-api/setting/model_setting"
|
||||||
"github.com/QuantumNous/new-api/setting/operation_setting"
|
|
||||||
"github.com/QuantumNous/new-api/setting/ratio_setting"
|
"github.com/QuantumNous/new-api/setting/ratio_setting"
|
||||||
"github.com/QuantumNous/new-api/types"
|
"github.com/QuantumNous/new-api/types"
|
||||||
"github.com/samber/lo"
|
"github.com/samber/lo"
|
||||||
|
|
||||||
"github.com/shopspring/decimal"
|
|
||||||
|
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -93,7 +88,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types
|
|||||||
if containAudioTokens && containsAudioRatios {
|
if containAudioTokens && containsAudioRatios {
|
||||||
service.PostAudioConsumeQuota(c, info, usage, "")
|
service.PostAudioConsumeQuota(c, info, usage, "")
|
||||||
} else {
|
} else {
|
||||||
postConsumeQuota(c, info, usage)
|
service.PostTextConsumeQuota(c, info, usage, nil)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -216,293 +211,7 @@ func TextHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *types
|
|||||||
if containAudioTokens && containsAudioRatios {
|
if containAudioTokens && containsAudioRatios {
|
||||||
service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
|
service.PostAudioConsumeQuota(c, info, usage.(*dto.Usage), "")
|
||||||
} else {
|
} else {
|
||||||
postConsumeQuota(c, info, usage.(*dto.Usage))
|
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent ...string) {
|
|
||||||
originUsage := usage
|
|
||||||
if usage == nil {
|
|
||||||
usage = &dto.Usage{
|
|
||||||
PromptTokens: relayInfo.GetEstimatePromptTokens(),
|
|
||||||
CompletionTokens: 0,
|
|
||||||
TotalTokens: relayInfo.GetEstimatePromptTokens(),
|
|
||||||
}
|
|
||||||
extraContent = append(extraContent, "上游无计费信息")
|
|
||||||
}
|
|
||||||
|
|
||||||
if originUsage != nil {
|
|
||||||
service.ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
|
|
||||||
}
|
|
||||||
|
|
||||||
adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
|
|
||||||
|
|
||||||
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
|
||||||
promptTokens := usage.PromptTokens
|
|
||||||
cacheTokens := usage.PromptTokensDetails.CachedTokens
|
|
||||||
imageTokens := usage.PromptTokensDetails.ImageTokens
|
|
||||||
audioTokens := usage.PromptTokensDetails.AudioTokens
|
|
||||||
completionTokens := usage.CompletionTokens
|
|
||||||
cachedCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
|
|
||||||
|
|
||||||
modelName := relayInfo.OriginModelName
|
|
||||||
|
|
||||||
tokenName := ctx.GetString("token_name")
|
|
||||||
completionRatio := relayInfo.PriceData.CompletionRatio
|
|
||||||
cacheRatio := relayInfo.PriceData.CacheRatio
|
|
||||||
imageRatio := relayInfo.PriceData.ImageRatio
|
|
||||||
modelRatio := relayInfo.PriceData.ModelRatio
|
|
||||||
groupRatio := relayInfo.PriceData.GroupRatioInfo.GroupRatio
|
|
||||||
modelPrice := relayInfo.PriceData.ModelPrice
|
|
||||||
cachedCreationRatio := relayInfo.PriceData.CacheCreationRatio
|
|
||||||
|
|
||||||
// Convert values to decimal for precise calculation
|
|
||||||
dPromptTokens := decimal.NewFromInt(int64(promptTokens))
|
|
||||||
dCacheTokens := decimal.NewFromInt(int64(cacheTokens))
|
|
||||||
dImageTokens := decimal.NewFromInt(int64(imageTokens))
|
|
||||||
dAudioTokens := decimal.NewFromInt(int64(audioTokens))
|
|
||||||
dCompletionTokens := decimal.NewFromInt(int64(completionTokens))
|
|
||||||
dCachedCreationTokens := decimal.NewFromInt(int64(cachedCreationTokens))
|
|
||||||
dCompletionRatio := decimal.NewFromFloat(completionRatio)
|
|
||||||
dCacheRatio := decimal.NewFromFloat(cacheRatio)
|
|
||||||
dImageRatio := decimal.NewFromFloat(imageRatio)
|
|
||||||
dModelRatio := decimal.NewFromFloat(modelRatio)
|
|
||||||
dGroupRatio := decimal.NewFromFloat(groupRatio)
|
|
||||||
dModelPrice := decimal.NewFromFloat(modelPrice)
|
|
||||||
dCachedCreationRatio := decimal.NewFromFloat(cachedCreationRatio)
|
|
||||||
dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
|
|
||||||
|
|
||||||
ratio := dModelRatio.Mul(dGroupRatio)
|
|
||||||
|
|
||||||
// openai web search 工具计费
|
|
||||||
var dWebSearchQuota decimal.Decimal
|
|
||||||
var webSearchPrice float64
|
|
||||||
// response api 格式工具计费
|
|
||||||
if relayInfo.ResponsesUsageInfo != nil {
|
|
||||||
if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 {
|
|
||||||
// 计算 web search 调用的配额 (配额 = 价格 * 调用次数 / 1000 * 分组倍率)
|
|
||||||
webSearchPrice = operation_setting.GetWebSearchPricePerThousand(modelName, webSearchTool.SearchContextSize)
|
|
||||||
dWebSearchQuota = decimal.NewFromFloat(webSearchPrice).
|
|
||||||
Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))).
|
|
||||||
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
|
||||||
extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 %d 次,上下文大小 %s,调用花费 %s",
|
|
||||||
webSearchTool.CallCount, webSearchTool.SearchContextSize, dWebSearchQuota.String()))
|
|
||||||
}
|
|
||||||
} else if strings.HasSuffix(modelName, "search-preview") {
|
|
||||||
// search-preview 模型不支持 response api
|
|
||||||
searchContextSize := ctx.GetString("chat_completion_web_search_context_size")
|
|
||||||
if searchContextSize == "" {
|
|
||||||
searchContextSize = "medium"
|
|
||||||
}
|
|
||||||
webSearchPrice = operation_setting.GetWebSearchPricePerThousand(modelName, searchContextSize)
|
|
||||||
dWebSearchQuota = decimal.NewFromFloat(webSearchPrice).
|
|
||||||
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
|
||||||
extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 1 次,上下文大小 %s,调用花费 %s",
|
|
||||||
searchContextSize, dWebSearchQuota.String()))
|
|
||||||
}
|
|
||||||
// claude web search tool 计费
|
|
||||||
var dClaudeWebSearchQuota decimal.Decimal
|
|
||||||
var claudeWebSearchPrice float64
|
|
||||||
claudeWebSearchCallCount := ctx.GetInt("claude_web_search_requests")
|
|
||||||
if claudeWebSearchCallCount > 0 {
|
|
||||||
claudeWebSearchPrice = operation_setting.GetClaudeWebSearchPricePerThousand()
|
|
||||||
dClaudeWebSearchQuota = decimal.NewFromFloat(claudeWebSearchPrice).
|
|
||||||
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit).Mul(decimal.NewFromInt(int64(claudeWebSearchCallCount)))
|
|
||||||
extraContent = append(extraContent, fmt.Sprintf("Claude Web Search 调用 %d 次,调用花费 %s",
|
|
||||||
claudeWebSearchCallCount, dClaudeWebSearchQuota.String()))
|
|
||||||
}
|
|
||||||
// file search tool 计费
|
|
||||||
var dFileSearchQuota decimal.Decimal
|
|
||||||
var fileSearchPrice float64
|
|
||||||
if relayInfo.ResponsesUsageInfo != nil {
|
|
||||||
if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 {
|
|
||||||
fileSearchPrice = operation_setting.GetFileSearchPricePerThousand()
|
|
||||||
dFileSearchQuota = decimal.NewFromFloat(fileSearchPrice).
|
|
||||||
Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
|
|
||||||
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
|
||||||
extraContent = append(extraContent, fmt.Sprintf("File Search 调用 %d 次,调用花费 %s",
|
|
||||||
fileSearchTool.CallCount, dFileSearchQuota.String()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
var dImageGenerationCallQuota decimal.Decimal
|
|
||||||
var imageGenerationCallPrice float64
|
|
||||||
if ctx.GetBool("image_generation_call") {
|
|
||||||
imageGenerationCallPrice = operation_setting.GetGPTImage1PriceOnceCall(ctx.GetString("image_generation_call_quality"), ctx.GetString("image_generation_call_size"))
|
|
||||||
dImageGenerationCallQuota = decimal.NewFromFloat(imageGenerationCallPrice).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
|
||||||
extraContent = append(extraContent, fmt.Sprintf("Image Generation Call 花费 %s", dImageGenerationCallQuota.String()))
|
|
||||||
}
|
|
||||||
|
|
||||||
var quotaCalculateDecimal decimal.Decimal
|
|
||||||
|
|
||||||
var audioInputQuota decimal.Decimal
|
|
||||||
var audioInputPrice float64
|
|
||||||
isClaudeUsageSemantic := relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude
|
|
||||||
if !relayInfo.PriceData.UsePrice {
|
|
||||||
baseTokens := dPromptTokens
|
|
||||||
// 减去 cached tokens
|
|
||||||
// Anthropic API 的 input_tokens 已经不包含缓存 tokens,不需要减去
|
|
||||||
// OpenAI/OpenRouter 等 API 的 prompt_tokens 包含缓存 tokens,需要减去
|
|
||||||
var cachedTokensWithRatio decimal.Decimal
|
|
||||||
if !dCacheTokens.IsZero() {
|
|
||||||
if !isClaudeUsageSemantic {
|
|
||||||
baseTokens = baseTokens.Sub(dCacheTokens)
|
|
||||||
}
|
|
||||||
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
|
|
||||||
}
|
|
||||||
var dCachedCreationTokensWithRatio decimal.Decimal
|
|
||||||
if !dCachedCreationTokens.IsZero() {
|
|
||||||
if !isClaudeUsageSemantic {
|
|
||||||
baseTokens = baseTokens.Sub(dCachedCreationTokens)
|
|
||||||
}
|
|
||||||
dCachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCachedCreationRatio)
|
|
||||||
}
|
|
||||||
|
|
||||||
// 减去 image tokens
|
|
||||||
var imageTokensWithRatio decimal.Decimal
|
|
||||||
if !dImageTokens.IsZero() {
|
|
||||||
baseTokens = baseTokens.Sub(dImageTokens)
|
|
||||||
imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
|
|
||||||
}
|
|
||||||
|
|
||||||
// 减去 Gemini audio tokens
|
|
||||||
if !dAudioTokens.IsZero() {
|
|
||||||
audioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(modelName)
|
|
||||||
if audioInputPrice > 0 {
|
|
||||||
// 重新计算 base tokens
|
|
||||||
baseTokens = baseTokens.Sub(dAudioTokens)
|
|
||||||
audioInputQuota = decimal.NewFromFloat(audioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
|
||||||
extraContent = append(extraContent, fmt.Sprintf("Audio Input 花费 %s", audioInputQuota.String()))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
promptQuota := baseTokens.Add(cachedTokensWithRatio).
|
|
||||||
Add(imageTokensWithRatio).
|
|
||||||
Add(dCachedCreationTokensWithRatio)
|
|
||||||
|
|
||||||
completionQuota := dCompletionTokens.Mul(dCompletionRatio)
|
|
||||||
|
|
||||||
quotaCalculateDecimal = promptQuota.Add(completionQuota).Mul(ratio)
|
|
||||||
|
|
||||||
if !ratio.IsZero() && quotaCalculateDecimal.LessThanOrEqual(decimal.Zero) {
|
|
||||||
quotaCalculateDecimal = decimal.NewFromInt(1)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
quotaCalculateDecimal = dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio)
|
|
||||||
}
|
|
||||||
// 添加 responses tools call 调用的配额
|
|
||||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
|
|
||||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
|
|
||||||
// 添加 audio input 独立计费
|
|
||||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
|
|
||||||
// 添加 image generation call 计费
|
|
||||||
quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
|
|
||||||
|
|
||||||
if len(relayInfo.PriceData.OtherRatios) > 0 {
|
|
||||||
for key, otherRatio := range relayInfo.PriceData.OtherRatios {
|
|
||||||
dOtherRatio := decimal.NewFromFloat(otherRatio)
|
|
||||||
quotaCalculateDecimal = quotaCalculateDecimal.Mul(dOtherRatio)
|
|
||||||
extraContent = append(extraContent, fmt.Sprintf("其他倍率 %s: %f", key, otherRatio))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
quota := int(quotaCalculateDecimal.Round(0).IntPart())
|
|
||||||
totalTokens := promptTokens + completionTokens
|
|
||||||
|
|
||||||
//var logContent string
|
|
||||||
|
|
||||||
// record all the consume log even if quota is 0
|
|
||||||
if totalTokens == 0 {
|
|
||||||
// in this case, must be some error happened
|
|
||||||
// we cannot just return, because we may have to return the pre-consumed quota
|
|
||||||
quota = 0
|
|
||||||
extraContent = append(extraContent, "上游没有返回计费信息,无法扣费(可能是上游超时)")
|
|
||||||
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
|
||||||
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
|
|
||||||
} else {
|
|
||||||
if !ratio.IsZero() && quota == 0 {
|
|
||||||
quota = 1
|
|
||||||
}
|
|
||||||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
|
||||||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := service.SettleBilling(ctx, relayInfo, quota); err != nil {
|
|
||||||
logger.LogError(ctx, "error settling billing: "+err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
logModel := modelName
|
|
||||||
if strings.HasPrefix(logModel, "gpt-4-gizmo") {
|
|
||||||
logModel = "gpt-4-gizmo-*"
|
|
||||||
extraContent = append(extraContent, fmt.Sprintf("模型 %s", modelName))
|
|
||||||
}
|
|
||||||
if strings.HasPrefix(logModel, "gpt-4o-gizmo") {
|
|
||||||
logModel = "gpt-4o-gizmo-*"
|
|
||||||
extraContent = append(extraContent, fmt.Sprintf("模型 %s", modelName))
|
|
||||||
}
|
|
||||||
logContent := strings.Join(extraContent, ", ")
|
|
||||||
other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
|
|
||||||
if adminRejectReason != "" {
|
|
||||||
other["reject_reason"] = adminRejectReason
|
|
||||||
}
|
|
||||||
// For chat-based calls to the Claude model, tagging is required. Using Claude's rendering logs, the two approaches handle input rendering differently.
|
|
||||||
if isClaudeUsageSemantic {
|
|
||||||
other["claude"] = true
|
|
||||||
other["usage_semantic"] = "anthropic"
|
|
||||||
}
|
|
||||||
if imageTokens != 0 {
|
|
||||||
other["image"] = true
|
|
||||||
other["image_ratio"] = imageRatio
|
|
||||||
other["image_output"] = imageTokens
|
|
||||||
}
|
|
||||||
if cachedCreationTokens != 0 {
|
|
||||||
other["cache_creation_tokens"] = cachedCreationTokens
|
|
||||||
other["cache_creation_ratio"] = cachedCreationRatio
|
|
||||||
}
|
|
||||||
if !dWebSearchQuota.IsZero() {
|
|
||||||
if relayInfo.ResponsesUsageInfo != nil {
|
|
||||||
if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists {
|
|
||||||
other["web_search"] = true
|
|
||||||
other["web_search_call_count"] = webSearchTool.CallCount
|
|
||||||
other["web_search_price"] = webSearchPrice
|
|
||||||
}
|
|
||||||
} else if strings.HasSuffix(modelName, "search-preview") {
|
|
||||||
other["web_search"] = true
|
|
||||||
other["web_search_call_count"] = 1
|
|
||||||
other["web_search_price"] = webSearchPrice
|
|
||||||
}
|
|
||||||
} else if !dClaudeWebSearchQuota.IsZero() {
|
|
||||||
other["web_search"] = true
|
|
||||||
other["web_search_call_count"] = claudeWebSearchCallCount
|
|
||||||
other["web_search_price"] = claudeWebSearchPrice
|
|
||||||
}
|
|
||||||
if !dFileSearchQuota.IsZero() && relayInfo.ResponsesUsageInfo != nil {
|
|
||||||
if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists {
|
|
||||||
other["file_search"] = true
|
|
||||||
other["file_search_call_count"] = fileSearchTool.CallCount
|
|
||||||
other["file_search_price"] = fileSearchPrice
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !audioInputQuota.IsZero() {
|
|
||||||
other["audio_input_seperate_price"] = true
|
|
||||||
other["audio_input_token_count"] = audioTokens
|
|
||||||
other["audio_input_price"] = audioInputPrice
|
|
||||||
}
|
|
||||||
if !dImageGenerationCallQuota.IsZero() {
|
|
||||||
other["image_generation_call"] = true
|
|
||||||
other["image_generation_call_price"] = imageGenerationCallPrice
|
|
||||||
}
|
|
||||||
model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
|
|
||||||
ChannelId: relayInfo.ChannelId,
|
|
||||||
PromptTokens: promptTokens,
|
|
||||||
CompletionTokens: completionTokens,
|
|
||||||
ModelName: logModel,
|
|
||||||
TokenName: tokenName,
|
|
||||||
Quota: quota,
|
|
||||||
Content: logContent,
|
|
||||||
TokenId: relayInfo.TokenId,
|
|
||||||
UseTimeSeconds: int(useTimeSeconds),
|
|
||||||
IsStream: relayInfo.IsStream,
|
|
||||||
Group: relayInfo.UsingGroup,
|
|
||||||
Other: other,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -82,6 +82,6 @@ func EmbeddingHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
|
|||||||
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
|
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
|
||||||
return newAPIError
|
return newAPIError
|
||||||
}
|
}
|
||||||
postConsumeQuota(c, info, usage.(*dto.Usage))
|
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -194,7 +194,7 @@ func GeminiHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
|
|||||||
return openaiErr
|
return openaiErr
|
||||||
}
|
}
|
||||||
|
|
||||||
postConsumeQuota(c, info, usage.(*dto.Usage))
|
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -288,6 +288,6 @@ func GeminiEmbeddingHandler(c *gin.Context, info *relaycommon.RelayInfo) (newAPI
|
|||||||
return openaiErr
|
return openaiErr
|
||||||
}
|
}
|
||||||
|
|
||||||
postConsumeQuota(c, info, usage.(*dto.Usage))
|
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -141,6 +141,6 @@ func ImageHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *type
|
|||||||
logContent = append(logContent, fmt.Sprintf("生成数量 %d", imageN))
|
logContent = append(logContent, fmt.Sprintf("生成数量 %d", imageN))
|
||||||
}
|
}
|
||||||
|
|
||||||
postConsumeQuota(c, info, usage.(*dto.Usage), logContent...)
|
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), logContent)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -96,6 +96,6 @@ func RerankHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
|
|||||||
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
|
service.ResetStatusCode(newAPIError, statusCodeMappingStr)
|
||||||
return newAPIError
|
return newAPIError
|
||||||
}
|
}
|
||||||
postConsumeQuota(c, info, usage.(*dto.Usage))
|
service.PostTextConsumeQuota(c, info, usage.(*dto.Usage), nil)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -145,7 +145,7 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
|
|||||||
info.PriceData = originPriceData
|
info.PriceData = originPriceData
|
||||||
return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry())
|
return types.NewError(err, types.ErrorCodeModelPriceError, types.ErrOptionWithSkipRetry())
|
||||||
}
|
}
|
||||||
postConsumeQuota(c, info, usageDto)
|
service.PostTextConsumeQuota(c, info, usageDto, nil)
|
||||||
|
|
||||||
info.OriginModelName = originModelName
|
info.OriginModelName = originModelName
|
||||||
info.PriceData = originPriceData
|
info.PriceData = originPriceData
|
||||||
@@ -155,7 +155,7 @@ func ResponsesHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *
|
|||||||
if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") {
|
if strings.HasPrefix(info.OriginModelName, "gpt-4o-audio") {
|
||||||
service.PostAudioConsumeQuota(c, info, usageDto, "")
|
service.PostAudioConsumeQuota(c, info, usageDto, "")
|
||||||
} else {
|
} else {
|
||||||
postConsumeQuota(c, info, usageDto)
|
service.PostTextConsumeQuota(c, info, usageDto, nil)
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -223,6 +223,25 @@ func generateStopBlock(index int) *dto.ClaudeResponse {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func buildClaudeUsageFromOpenAIUsage(oaiUsage *dto.Usage) *dto.ClaudeUsage {
|
||||||
|
if oaiUsage == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
usage := &dto.ClaudeUsage{
|
||||||
|
InputTokens: oaiUsage.PromptTokens,
|
||||||
|
OutputTokens: oaiUsage.CompletionTokens,
|
||||||
|
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
||||||
|
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
||||||
|
}
|
||||||
|
if oaiUsage.ClaudeCacheCreation5mTokens > 0 || oaiUsage.ClaudeCacheCreation1hTokens > 0 {
|
||||||
|
usage.CacheCreation = &dto.ClaudeCacheCreationUsage{
|
||||||
|
Ephemeral5mInputTokens: oaiUsage.ClaudeCacheCreation5mTokens,
|
||||||
|
Ephemeral1hInputTokens: oaiUsage.ClaudeCacheCreation1hTokens,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return usage
|
||||||
|
}
|
||||||
|
|
||||||
func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse {
|
func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamResponse, info *relaycommon.RelayInfo) []*dto.ClaudeResponse {
|
||||||
if info.ClaudeConvertInfo.Done {
|
if info.ClaudeConvertInfo.Done {
|
||||||
return nil
|
return nil
|
||||||
@@ -391,13 +410,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
|
|||||||
}
|
}
|
||||||
if oaiUsage != nil {
|
if oaiUsage != nil {
|
||||||
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
||||||
Type: "message_delta",
|
Type: "message_delta",
|
||||||
Usage: &dto.ClaudeUsage{
|
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
|
||||||
InputTokens: oaiUsage.PromptTokens,
|
|
||||||
OutputTokens: oaiUsage.CompletionTokens,
|
|
||||||
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
|
||||||
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
|
||||||
},
|
|
||||||
Delta: &dto.ClaudeMediaMessage{
|
Delta: &dto.ClaudeMediaMessage{
|
||||||
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
||||||
},
|
},
|
||||||
@@ -419,13 +433,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
|
|||||||
oaiUsage := info.ClaudeConvertInfo.Usage
|
oaiUsage := info.ClaudeConvertInfo.Usage
|
||||||
if oaiUsage != nil {
|
if oaiUsage != nil {
|
||||||
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
||||||
Type: "message_delta",
|
Type: "message_delta",
|
||||||
Usage: &dto.ClaudeUsage{
|
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
|
||||||
InputTokens: oaiUsage.PromptTokens,
|
|
||||||
OutputTokens: oaiUsage.CompletionTokens,
|
|
||||||
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
|
||||||
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
|
||||||
},
|
|
||||||
Delta: &dto.ClaudeMediaMessage{
|
Delta: &dto.ClaudeMediaMessage{
|
||||||
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
||||||
},
|
},
|
||||||
@@ -555,13 +564,8 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
|
|||||||
}
|
}
|
||||||
if oaiUsage != nil {
|
if oaiUsage != nil {
|
||||||
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
|
||||||
Type: "message_delta",
|
Type: "message_delta",
|
||||||
Usage: &dto.ClaudeUsage{
|
Usage: buildClaudeUsageFromOpenAIUsage(oaiUsage),
|
||||||
InputTokens: oaiUsage.PromptTokens,
|
|
||||||
OutputTokens: oaiUsage.CompletionTokens,
|
|
||||||
CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
|
|
||||||
CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens,
|
|
||||||
},
|
|
||||||
Delta: &dto.ClaudeMediaMessage{
|
Delta: &dto.ClaudeMediaMessage{
|
||||||
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -73,6 +73,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m
|
|||||||
other["admin_info"] = adminInfo
|
other["admin_info"] = adminInfo
|
||||||
appendRequestPath(ctx, relayInfo, other)
|
appendRequestPath(ctx, relayInfo, other)
|
||||||
appendRequestConversionChain(relayInfo, other)
|
appendRequestConversionChain(relayInfo, other)
|
||||||
|
appendFinalRequestFormat(relayInfo, other)
|
||||||
appendBillingInfo(relayInfo, other)
|
appendBillingInfo(relayInfo, other)
|
||||||
appendParamOverrideInfo(relayInfo, other)
|
appendParamOverrideInfo(relayInfo, other)
|
||||||
return other
|
return other
|
||||||
@@ -167,6 +168,17 @@ func appendRequestConversionChain(relayInfo *relaycommon.RelayInfo, other map[st
|
|||||||
other["request_conversion"] = chain
|
other["request_conversion"] = chain
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func appendFinalRequestFormat(relayInfo *relaycommon.RelayInfo, other map[string]interface{}) {
|
||||||
|
if relayInfo == nil || other == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
|
||||||
|
// claude indicates the final upstream request format is Claude Messages.
|
||||||
|
// Frontend log rendering uses this to keep the original Claude input display.
|
||||||
|
other["claude"] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice, userGroupRatio float64) map[string]interface{} {
|
func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice, userGroupRatio float64) map[string]interface{} {
|
||||||
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice, userGroupRatio)
|
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice, userGroupRatio)
|
||||||
info["ws"] = true
|
info["ws"] = true
|
||||||
|
|||||||
102
service/quota.go
102
service/quota.go
@@ -235,108 +235,6 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) {
|
|
||||||
if usage != nil {
|
|
||||||
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
|
|
||||||
}
|
|
||||||
|
|
||||||
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
|
||||||
promptTokens := usage.PromptTokens
|
|
||||||
completionTokens := usage.CompletionTokens
|
|
||||||
modelName := relayInfo.OriginModelName
|
|
||||||
|
|
||||||
tokenName := ctx.GetString("token_name")
|
|
||||||
completionRatio := relayInfo.PriceData.CompletionRatio
|
|
||||||
modelRatio := relayInfo.PriceData.ModelRatio
|
|
||||||
groupRatio := relayInfo.PriceData.GroupRatioInfo.GroupRatio
|
|
||||||
modelPrice := relayInfo.PriceData.ModelPrice
|
|
||||||
cacheRatio := relayInfo.PriceData.CacheRatio
|
|
||||||
cacheTokens := usage.PromptTokensDetails.CachedTokens
|
|
||||||
|
|
||||||
cacheCreationRatio := relayInfo.PriceData.CacheCreationRatio
|
|
||||||
cacheCreationRatio5m := relayInfo.PriceData.CacheCreation5mRatio
|
|
||||||
cacheCreationRatio1h := relayInfo.PriceData.CacheCreation1hRatio
|
|
||||||
cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
|
|
||||||
cacheCreationTokens5m := usage.ClaudeCacheCreation5mTokens
|
|
||||||
cacheCreationTokens1h := usage.ClaudeCacheCreation1hTokens
|
|
||||||
|
|
||||||
if relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
|
|
||||||
promptTokens -= cacheTokens
|
|
||||||
isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(modelName, relayInfo.PriceData.ModelRatio)
|
|
||||||
if cacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
|
|
||||||
maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData)
|
|
||||||
if maybeCacheCreationTokens >= 0 && promptTokens >= maybeCacheCreationTokens {
|
|
||||||
cacheCreationTokens = maybeCacheCreationTokens
|
|
||||||
}
|
|
||||||
}
|
|
||||||
promptTokens -= cacheCreationTokens
|
|
||||||
}
|
|
||||||
|
|
||||||
calculateQuota := 0.0
|
|
||||||
if !relayInfo.PriceData.UsePrice {
|
|
||||||
calculateQuota = float64(promptTokens)
|
|
||||||
calculateQuota += float64(cacheTokens) * cacheRatio
|
|
||||||
calculateQuota += float64(cacheCreationTokens5m) * cacheCreationRatio5m
|
|
||||||
calculateQuota += float64(cacheCreationTokens1h) * cacheCreationRatio1h
|
|
||||||
remainingCacheCreationTokens := cacheCreationTokens - cacheCreationTokens5m - cacheCreationTokens1h
|
|
||||||
if remainingCacheCreationTokens > 0 {
|
|
||||||
calculateQuota += float64(remainingCacheCreationTokens) * cacheCreationRatio
|
|
||||||
}
|
|
||||||
calculateQuota += float64(completionTokens) * completionRatio
|
|
||||||
calculateQuota = calculateQuota * groupRatio * modelRatio
|
|
||||||
} else {
|
|
||||||
calculateQuota = modelPrice * common.QuotaPerUnit * groupRatio
|
|
||||||
}
|
|
||||||
|
|
||||||
if modelRatio != 0 && calculateQuota <= 0 {
|
|
||||||
calculateQuota = 1
|
|
||||||
}
|
|
||||||
|
|
||||||
quota := int(calculateQuota)
|
|
||||||
|
|
||||||
totalTokens := promptTokens + completionTokens
|
|
||||||
|
|
||||||
var logContent string
|
|
||||||
// record all the consume log even if quota is 0
|
|
||||||
if totalTokens == 0 {
|
|
||||||
// in this case, must be some error happened
|
|
||||||
// we cannot just return, because we may have to return the pre-consumed quota
|
|
||||||
quota = 0
|
|
||||||
logContent += fmt.Sprintf("(可能是上游出错)")
|
|
||||||
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
|
||||||
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, relayInfo.FinalPreConsumedQuota))
|
|
||||||
} else {
|
|
||||||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
|
||||||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := SettleBilling(ctx, relayInfo, quota); err != nil {
|
|
||||||
logger.LogError(ctx, "error settling billing: "+err.Error())
|
|
||||||
}
|
|
||||||
|
|
||||||
other := GenerateClaudeOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio,
|
|
||||||
cacheTokens, cacheRatio,
|
|
||||||
cacheCreationTokens, cacheCreationRatio,
|
|
||||||
cacheCreationTokens5m, cacheCreationRatio5m,
|
|
||||||
cacheCreationTokens1h, cacheCreationRatio1h,
|
|
||||||
modelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
|
|
||||||
model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
|
|
||||||
ChannelId: relayInfo.ChannelId,
|
|
||||||
PromptTokens: promptTokens,
|
|
||||||
CompletionTokens: completionTokens,
|
|
||||||
ModelName: modelName,
|
|
||||||
TokenName: tokenName,
|
|
||||||
Quota: quota,
|
|
||||||
Content: logContent,
|
|
||||||
TokenId: relayInfo.TokenId,
|
|
||||||
UseTimeSeconds: int(useTimeSeconds),
|
|
||||||
IsStream: relayInfo.IsStream,
|
|
||||||
Group: relayInfo.UsingGroup,
|
|
||||||
Other: other,
|
|
||||||
})
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData types.PriceData) int {
|
func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData types.PriceData) int {
|
||||||
if priceData.CacheCreationRatio == 1 {
|
if priceData.CacheCreationRatio == 1 {
|
||||||
return 0
|
return 0
|
||||||
|
|||||||
427
service/text_quota.go
Normal file
427
service/text_quota.go
Normal file
@@ -0,0 +1,427 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"strings"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/QuantumNous/new-api/common"
|
||||||
|
"github.com/QuantumNous/new-api/constant"
|
||||||
|
"github.com/QuantumNous/new-api/dto"
|
||||||
|
"github.com/QuantumNous/new-api/logger"
|
||||||
|
"github.com/QuantumNous/new-api/model"
|
||||||
|
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||||
|
"github.com/QuantumNous/new-api/setting/operation_setting"
|
||||||
|
"github.com/QuantumNous/new-api/types"
|
||||||
|
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/shopspring/decimal"
|
||||||
|
)
|
||||||
|
|
||||||
|
type textQuotaSummary struct {
|
||||||
|
PromptTokens int
|
||||||
|
CompletionTokens int
|
||||||
|
TotalTokens int
|
||||||
|
CacheTokens int
|
||||||
|
CacheCreationTokens int
|
||||||
|
CacheCreationTokens5m int
|
||||||
|
CacheCreationTokens1h int
|
||||||
|
ImageTokens int
|
||||||
|
AudioTokens int
|
||||||
|
ModelName string
|
||||||
|
TokenName string
|
||||||
|
UseTimeSeconds int64
|
||||||
|
CompletionRatio float64
|
||||||
|
CacheRatio float64
|
||||||
|
ImageRatio float64
|
||||||
|
ModelRatio float64
|
||||||
|
GroupRatio float64
|
||||||
|
ModelPrice float64
|
||||||
|
CacheCreationRatio float64
|
||||||
|
CacheCreationRatio5m float64
|
||||||
|
CacheCreationRatio1h float64
|
||||||
|
Quota int
|
||||||
|
IsClaudeUsageSemantic bool
|
||||||
|
UsageSemantic string
|
||||||
|
WebSearchPrice float64
|
||||||
|
WebSearchCallCount int
|
||||||
|
ClaudeWebSearchPrice float64
|
||||||
|
ClaudeWebSearchCallCount int
|
||||||
|
FileSearchPrice float64
|
||||||
|
FileSearchCallCount int
|
||||||
|
AudioInputPrice float64
|
||||||
|
ImageGenerationCallPrice float64
|
||||||
|
}
|
||||||
|
|
||||||
|
func cacheWriteTokensTotal(summary textQuotaSummary) int {
|
||||||
|
if summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0 {
|
||||||
|
splitCacheWriteTokens := summary.CacheCreationTokens5m + summary.CacheCreationTokens1h
|
||||||
|
if summary.CacheCreationTokens > splitCacheWriteTokens {
|
||||||
|
return summary.CacheCreationTokens
|
||||||
|
}
|
||||||
|
return splitCacheWriteTokens
|
||||||
|
}
|
||||||
|
return summary.CacheCreationTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
func isLegacyClaudeDerivedOpenAIUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) bool {
|
||||||
|
if relayInfo == nil || usage == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if usage.UsageSource != "" || usage.UsageSemantic != "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return usage.ClaudeCacheCreation5mTokens > 0 || usage.ClaudeCacheCreation1hTokens > 0
|
||||||
|
}
|
||||||
|
|
||||||
|
func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) textQuotaSummary {
|
||||||
|
summary := textQuotaSummary{
|
||||||
|
ModelName: relayInfo.OriginModelName,
|
||||||
|
TokenName: ctx.GetString("token_name"),
|
||||||
|
UseTimeSeconds: time.Now().Unix() - relayInfo.StartTime.Unix(),
|
||||||
|
CompletionRatio: relayInfo.PriceData.CompletionRatio,
|
||||||
|
CacheRatio: relayInfo.PriceData.CacheRatio,
|
||||||
|
ImageRatio: relayInfo.PriceData.ImageRatio,
|
||||||
|
ModelRatio: relayInfo.PriceData.ModelRatio,
|
||||||
|
GroupRatio: relayInfo.PriceData.GroupRatioInfo.GroupRatio,
|
||||||
|
ModelPrice: relayInfo.PriceData.ModelPrice,
|
||||||
|
CacheCreationRatio: relayInfo.PriceData.CacheCreationRatio,
|
||||||
|
CacheCreationRatio5m: relayInfo.PriceData.CacheCreation5mRatio,
|
||||||
|
CacheCreationRatio1h: relayInfo.PriceData.CacheCreation1hRatio,
|
||||||
|
UsageSemantic: usageSemanticFromUsage(relayInfo, usage),
|
||||||
|
}
|
||||||
|
summary.IsClaudeUsageSemantic = summary.UsageSemantic == "anthropic"
|
||||||
|
|
||||||
|
if usage == nil {
|
||||||
|
usage = &dto.Usage{
|
||||||
|
PromptTokens: relayInfo.GetEstimatePromptTokens(),
|
||||||
|
CompletionTokens: 0,
|
||||||
|
TotalTokens: relayInfo.GetEstimatePromptTokens(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
summary.PromptTokens = usage.PromptTokens
|
||||||
|
summary.CompletionTokens = usage.CompletionTokens
|
||||||
|
summary.TotalTokens = usage.PromptTokens + usage.CompletionTokens
|
||||||
|
summary.CacheTokens = usage.PromptTokensDetails.CachedTokens
|
||||||
|
summary.CacheCreationTokens = usage.PromptTokensDetails.CachedCreationTokens
|
||||||
|
summary.CacheCreationTokens5m = usage.ClaudeCacheCreation5mTokens
|
||||||
|
summary.CacheCreationTokens1h = usage.ClaudeCacheCreation1hTokens
|
||||||
|
summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
|
||||||
|
summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
|
||||||
|
legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
|
||||||
|
|
||||||
|
if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
|
||||||
|
summary.PromptTokens -= summary.CacheTokens
|
||||||
|
isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio)
|
||||||
|
if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
|
||||||
|
maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, relayInfo.PriceData)
|
||||||
|
if maybeCacheCreationTokens >= 0 && summary.PromptTokens >= maybeCacheCreationTokens {
|
||||||
|
summary.CacheCreationTokens = maybeCacheCreationTokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
summary.PromptTokens -= summary.CacheCreationTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
dPromptTokens := decimal.NewFromInt(int64(summary.PromptTokens))
|
||||||
|
dCacheTokens := decimal.NewFromInt(int64(summary.CacheTokens))
|
||||||
|
dImageTokens := decimal.NewFromInt(int64(summary.ImageTokens))
|
||||||
|
dAudioTokens := decimal.NewFromInt(int64(summary.AudioTokens))
|
||||||
|
dCompletionTokens := decimal.NewFromInt(int64(summary.CompletionTokens))
|
||||||
|
dCachedCreationTokens := decimal.NewFromInt(int64(summary.CacheCreationTokens))
|
||||||
|
dCompletionRatio := decimal.NewFromFloat(summary.CompletionRatio)
|
||||||
|
dCacheRatio := decimal.NewFromFloat(summary.CacheRatio)
|
||||||
|
dImageRatio := decimal.NewFromFloat(summary.ImageRatio)
|
||||||
|
dModelRatio := decimal.NewFromFloat(summary.ModelRatio)
|
||||||
|
dGroupRatio := decimal.NewFromFloat(summary.GroupRatio)
|
||||||
|
dModelPrice := decimal.NewFromFloat(summary.ModelPrice)
|
||||||
|
dCacheCreationRatio := decimal.NewFromFloat(summary.CacheCreationRatio)
|
||||||
|
dCacheCreationRatio5m := decimal.NewFromFloat(summary.CacheCreationRatio5m)
|
||||||
|
dCacheCreationRatio1h := decimal.NewFromFloat(summary.CacheCreationRatio1h)
|
||||||
|
dQuotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
|
||||||
|
|
||||||
|
ratio := dModelRatio.Mul(dGroupRatio)
|
||||||
|
|
||||||
|
var dWebSearchQuota decimal.Decimal
|
||||||
|
if relayInfo.ResponsesUsageInfo != nil {
|
||||||
|
if webSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolWebSearchPreview]; exists && webSearchTool.CallCount > 0 {
|
||||||
|
summary.WebSearchCallCount = webSearchTool.CallCount
|
||||||
|
summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, webSearchTool.SearchContextSize)
|
||||||
|
dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice).
|
||||||
|
Mul(decimal.NewFromInt(int64(webSearchTool.CallCount))).
|
||||||
|
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||||
|
}
|
||||||
|
} else if strings.HasSuffix(summary.ModelName, "search-preview") {
|
||||||
|
searchContextSize := ctx.GetString("chat_completion_web_search_context_size")
|
||||||
|
if searchContextSize == "" {
|
||||||
|
searchContextSize = "medium"
|
||||||
|
}
|
||||||
|
summary.WebSearchCallCount = 1
|
||||||
|
summary.WebSearchPrice = operation_setting.GetWebSearchPricePerThousand(summary.ModelName, searchContextSize)
|
||||||
|
dWebSearchQuota = decimal.NewFromFloat(summary.WebSearchPrice).
|
||||||
|
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||||
|
}
|
||||||
|
|
||||||
|
var dClaudeWebSearchQuota decimal.Decimal
|
||||||
|
summary.ClaudeWebSearchCallCount = ctx.GetInt("claude_web_search_requests")
|
||||||
|
if summary.ClaudeWebSearchCallCount > 0 {
|
||||||
|
summary.ClaudeWebSearchPrice = operation_setting.GetClaudeWebSearchPricePerThousand()
|
||||||
|
dClaudeWebSearchQuota = decimal.NewFromFloat(summary.ClaudeWebSearchPrice).
|
||||||
|
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit).
|
||||||
|
Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount)))
|
||||||
|
}
|
||||||
|
|
||||||
|
var dFileSearchQuota decimal.Decimal
|
||||||
|
if relayInfo.ResponsesUsageInfo != nil {
|
||||||
|
if fileSearchTool, exists := relayInfo.ResponsesUsageInfo.BuiltInTools[dto.BuildInToolFileSearch]; exists && fileSearchTool.CallCount > 0 {
|
||||||
|
summary.FileSearchCallCount = fileSearchTool.CallCount
|
||||||
|
summary.FileSearchPrice = operation_setting.GetFileSearchPricePerThousand()
|
||||||
|
dFileSearchQuota = decimal.NewFromFloat(summary.FileSearchPrice).
|
||||||
|
Mul(decimal.NewFromInt(int64(fileSearchTool.CallCount))).
|
||||||
|
Div(decimal.NewFromInt(1000)).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var dImageGenerationCallQuota decimal.Decimal
|
||||||
|
if ctx.GetBool("image_generation_call") {
|
||||||
|
summary.ImageGenerationCallPrice = operation_setting.GetGPTImage1PriceOnceCall(ctx.GetString("image_generation_call_quality"), ctx.GetString("image_generation_call_size"))
|
||||||
|
dImageGenerationCallQuota = decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||||
|
}
|
||||||
|
|
||||||
|
var audioInputQuota decimal.Decimal
|
||||||
|
if !relayInfo.PriceData.UsePrice {
|
||||||
|
baseTokens := dPromptTokens
|
||||||
|
|
||||||
|
var cachedTokensWithRatio decimal.Decimal
|
||||||
|
if !dCacheTokens.IsZero() {
|
||||||
|
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
|
||||||
|
baseTokens = baseTokens.Sub(dCacheTokens)
|
||||||
|
}
|
||||||
|
cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
|
||||||
|
}
|
||||||
|
|
||||||
|
var cachedCreationTokensWithRatio decimal.Decimal
|
||||||
|
hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0
|
||||||
|
if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens {
|
||||||
|
if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
|
||||||
|
baseTokens = baseTokens.Sub(dCachedCreationTokens)
|
||||||
|
cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio)
|
||||||
|
} else {
|
||||||
|
remaining := summary.CacheCreationTokens - summary.CacheCreationTokens5m - summary.CacheCreationTokens1h
|
||||||
|
if remaining < 0 {
|
||||||
|
remaining = 0
|
||||||
|
}
|
||||||
|
cachedCreationTokensWithRatio = decimal.NewFromInt(int64(remaining)).Mul(dCacheCreationRatio)
|
||||||
|
cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens5m)).Mul(dCacheCreationRatio5m))
|
||||||
|
cachedCreationTokensWithRatio = cachedCreationTokensWithRatio.Add(decimal.NewFromInt(int64(summary.CacheCreationTokens1h)).Mul(dCacheCreationRatio1h))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var imageTokensWithRatio decimal.Decimal
|
||||||
|
if !dImageTokens.IsZero() {
|
||||||
|
baseTokens = baseTokens.Sub(dImageTokens)
|
||||||
|
imageTokensWithRatio = dImageTokens.Mul(dImageRatio)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !dAudioTokens.IsZero() {
|
||||||
|
summary.AudioInputPrice = operation_setting.GetGeminiInputAudioPricePerMillionTokens(summary.ModelName)
|
||||||
|
if summary.AudioInputPrice > 0 {
|
||||||
|
baseTokens = baseTokens.Sub(dAudioTokens)
|
||||||
|
audioInputQuota = decimal.NewFromFloat(summary.AudioInputPrice).
|
||||||
|
Div(decimal.NewFromInt(1000000)).Mul(dAudioTokens).Mul(dGroupRatio).Mul(dQuotaPerUnit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
promptQuota := baseTokens.Add(cachedTokensWithRatio).Add(imageTokensWithRatio).Add(cachedCreationTokensWithRatio)
|
||||||
|
completionQuota := dCompletionTokens.Mul(dCompletionRatio)
|
||||||
|
quotaCalculateDecimal := promptQuota.Add(completionQuota).Mul(ratio)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
|
||||||
|
|
||||||
|
if len(relayInfo.PriceData.OtherRatios) > 0 {
|
||||||
|
for _, otherRatio := range relayInfo.PriceData.OtherRatios {
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !ratio.IsZero() && quotaCalculateDecimal.LessThanOrEqual(decimal.Zero) {
|
||||||
|
quotaCalculateDecimal = decimal.NewFromInt(1)
|
||||||
|
}
|
||||||
|
summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart())
|
||||||
|
} else {
|
||||||
|
quotaCalculateDecimal := dModelPrice.Mul(dQuotaPerUnit).Mul(dGroupRatio)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(dWebSearchQuota)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(dClaudeWebSearchQuota)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(dFileSearchQuota)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(audioInputQuota)
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Add(dImageGenerationCallQuota)
|
||||||
|
if len(relayInfo.PriceData.OtherRatios) > 0 {
|
||||||
|
for _, otherRatio := range relayInfo.PriceData.OtherRatios {
|
||||||
|
quotaCalculateDecimal = quotaCalculateDecimal.Mul(decimal.NewFromFloat(otherRatio))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
summary.Quota = int(quotaCalculateDecimal.Round(0).IntPart())
|
||||||
|
}
|
||||||
|
|
||||||
|
if summary.TotalTokens == 0 {
|
||||||
|
summary.Quota = 0
|
||||||
|
} else if !ratio.IsZero() && summary.Quota == 0 {
|
||||||
|
summary.Quota = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
return summary
|
||||||
|
}
|
||||||
|
|
||||||
|
func usageSemanticFromUsage(relayInfo *relaycommon.RelayInfo, usage *dto.Usage) string {
|
||||||
|
if usage != nil && usage.UsageSemantic != "" {
|
||||||
|
return usage.UsageSemantic
|
||||||
|
}
|
||||||
|
if relayInfo != nil && relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude {
|
||||||
|
return "anthropic"
|
||||||
|
}
|
||||||
|
return "openai"
|
||||||
|
}
|
||||||
|
|
||||||
|
func PostTextConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, extraContent []string) {
|
||||||
|
originUsage := usage
|
||||||
|
if usage == nil {
|
||||||
|
extraContent = append(extraContent, "上游无计费信息")
|
||||||
|
}
|
||||||
|
if originUsage != nil {
|
||||||
|
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
|
||||||
|
}
|
||||||
|
|
||||||
|
adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
|
||||||
|
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||||
|
|
||||||
|
if summary.WebSearchCallCount > 0 {
|
||||||
|
extraContent = append(extraContent, fmt.Sprintf("Web Search 调用 %d 次,调用花费 %s", summary.WebSearchCallCount, decimal.NewFromFloat(summary.WebSearchPrice).Mul(decimal.NewFromInt(int64(summary.WebSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
|
||||||
|
}
|
||||||
|
if summary.ClaudeWebSearchCallCount > 0 {
|
||||||
|
extraContent = append(extraContent, fmt.Sprintf("Claude Web Search 调用 %d 次,调用花费 %s", summary.ClaudeWebSearchCallCount, decimal.NewFromFloat(summary.ClaudeWebSearchPrice).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).Mul(decimal.NewFromInt(int64(summary.ClaudeWebSearchCallCount))).String()))
|
||||||
|
}
|
||||||
|
if summary.FileSearchCallCount > 0 {
|
||||||
|
extraContent = append(extraContent, fmt.Sprintf("File Search 调用 %d 次,调用花费 %s", summary.FileSearchCallCount, decimal.NewFromFloat(summary.FileSearchPrice).Mul(decimal.NewFromInt(int64(summary.FileSearchCallCount))).Div(decimal.NewFromInt(1000)).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
|
||||||
|
}
|
||||||
|
if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 {
|
||||||
|
extraContent = append(extraContent, fmt.Sprintf("Audio Input 花费 %s", decimal.NewFromFloat(summary.AudioInputPrice).Div(decimal.NewFromInt(1000000)).Mul(decimal.NewFromInt(int64(summary.AudioTokens))).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
|
||||||
|
}
|
||||||
|
if summary.ImageGenerationCallPrice > 0 {
|
||||||
|
extraContent = append(extraContent, fmt.Sprintf("Image Generation Call 花费 %s", decimal.NewFromFloat(summary.ImageGenerationCallPrice).Mul(decimal.NewFromFloat(summary.GroupRatio)).Mul(decimal.NewFromFloat(common.QuotaPerUnit)).String()))
|
||||||
|
}
|
||||||
|
|
||||||
|
if summary.TotalTokens == 0 {
|
||||||
|
extraContent = append(extraContent, "上游没有返回计费信息,无法扣费(可能是上游超时)")
|
||||||
|
logger.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, summary.ModelName, relayInfo.FinalPreConsumedQuota))
|
||||||
|
} else {
|
||||||
|
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, summary.Quota)
|
||||||
|
model.UpdateChannelUsedQuota(relayInfo.ChannelId, summary.Quota)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := SettleBilling(ctx, relayInfo, summary.Quota); err != nil {
|
||||||
|
logger.LogError(ctx, "error settling billing: "+err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
logModel := summary.ModelName
|
||||||
|
if strings.HasPrefix(logModel, "gpt-4-gizmo") {
|
||||||
|
logModel = "gpt-4-gizmo-*"
|
||||||
|
extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName))
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(logModel, "gpt-4o-gizmo") {
|
||||||
|
logModel = "gpt-4o-gizmo-*"
|
||||||
|
extraContent = append(extraContent, fmt.Sprintf("模型 %s", summary.ModelName))
|
||||||
|
}
|
||||||
|
|
||||||
|
logContent := strings.Join(extraContent, ", ")
|
||||||
|
var other map[string]interface{}
|
||||||
|
if summary.IsClaudeUsageSemantic {
|
||||||
|
other = GenerateClaudeOtherInfo(ctx, relayInfo,
|
||||||
|
summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio,
|
||||||
|
summary.CacheTokens, summary.CacheRatio,
|
||||||
|
summary.CacheCreationTokens, summary.CacheCreationRatio,
|
||||||
|
summary.CacheCreationTokens5m, summary.CacheCreationRatio5m,
|
||||||
|
summary.CacheCreationTokens1h, summary.CacheCreationRatio1h,
|
||||||
|
summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
|
||||||
|
other["usage_semantic"] = "anthropic"
|
||||||
|
} else {
|
||||||
|
other = GenerateTextOtherInfo(ctx, relayInfo, summary.ModelRatio, summary.GroupRatio, summary.CompletionRatio, summary.CacheTokens, summary.CacheRatio, summary.ModelPrice, relayInfo.PriceData.GroupRatioInfo.GroupSpecialRatio)
|
||||||
|
}
|
||||||
|
if adminRejectReason != "" {
|
||||||
|
other["reject_reason"] = adminRejectReason
|
||||||
|
}
|
||||||
|
if summary.ImageTokens != 0 {
|
||||||
|
other["image"] = true
|
||||||
|
other["image_ratio"] = summary.ImageRatio
|
||||||
|
other["image_output"] = summary.ImageTokens
|
||||||
|
}
|
||||||
|
if summary.WebSearchCallCount > 0 {
|
||||||
|
other["web_search"] = true
|
||||||
|
other["web_search_call_count"] = summary.WebSearchCallCount
|
||||||
|
other["web_search_price"] = summary.WebSearchPrice
|
||||||
|
} else if summary.ClaudeWebSearchCallCount > 0 {
|
||||||
|
other["web_search"] = true
|
||||||
|
other["web_search_call_count"] = summary.ClaudeWebSearchCallCount
|
||||||
|
other["web_search_price"] = summary.ClaudeWebSearchPrice
|
||||||
|
}
|
||||||
|
if summary.FileSearchCallCount > 0 {
|
||||||
|
other["file_search"] = true
|
||||||
|
other["file_search_call_count"] = summary.FileSearchCallCount
|
||||||
|
other["file_search_price"] = summary.FileSearchPrice
|
||||||
|
}
|
||||||
|
if summary.AudioInputPrice > 0 && summary.AudioTokens > 0 {
|
||||||
|
other["audio_input_seperate_price"] = true
|
||||||
|
other["audio_input_token_count"] = summary.AudioTokens
|
||||||
|
other["audio_input_price"] = summary.AudioInputPrice
|
||||||
|
}
|
||||||
|
if summary.ImageGenerationCallPrice > 0 {
|
||||||
|
other["image_generation_call"] = true
|
||||||
|
other["image_generation_call_price"] = summary.ImageGenerationCallPrice
|
||||||
|
}
|
||||||
|
if summary.CacheCreationTokens > 0 {
|
||||||
|
other["cache_creation_tokens"] = summary.CacheCreationTokens
|
||||||
|
other["cache_creation_ratio"] = summary.CacheCreationRatio
|
||||||
|
}
|
||||||
|
if summary.CacheCreationTokens5m > 0 {
|
||||||
|
other["cache_creation_tokens_5m"] = summary.CacheCreationTokens5m
|
||||||
|
other["cache_creation_ratio_5m"] = summary.CacheCreationRatio5m
|
||||||
|
}
|
||||||
|
if summary.CacheCreationTokens1h > 0 {
|
||||||
|
other["cache_creation_tokens_1h"] = summary.CacheCreationTokens1h
|
||||||
|
other["cache_creation_ratio_1h"] = summary.CacheCreationRatio1h
|
||||||
|
}
|
||||||
|
cacheWriteTokens := cacheWriteTokensTotal(summary)
|
||||||
|
if cacheWriteTokens > 0 {
|
||||||
|
// cache_write_tokens: normalized cache creation total for UI display.
|
||||||
|
// If split 5m/1h values are present, this is their sum; otherwise it falls back
|
||||||
|
// to cache_creation_tokens.
|
||||||
|
other["cache_write_tokens"] = cacheWriteTokens
|
||||||
|
}
|
||||||
|
if relayInfo.GetFinalRequestRelayFormat() != types.RelayFormatClaude && usage != nil && usage.UsageSource != "" && usage.InputTokens > 0 {
|
||||||
|
// input_tokens_total: explicit normalized total input used by the usage log UI.
|
||||||
|
// Only write this field when upstream/current conversion has already provided a
|
||||||
|
// reliable total input value and tagged the usage source. Do not infer it from
|
||||||
|
// prompt/cache fields here, otherwise old upstream payloads may be double-counted.
|
||||||
|
other["input_tokens_total"] = usage.InputTokens
|
||||||
|
}
|
||||||
|
|
||||||
|
model.RecordConsumeLog(ctx, relayInfo.UserId, model.RecordConsumeLogParams{
|
||||||
|
ChannelId: relayInfo.ChannelId,
|
||||||
|
PromptTokens: summary.PromptTokens,
|
||||||
|
CompletionTokens: summary.CompletionTokens,
|
||||||
|
ModelName: logModel,
|
||||||
|
TokenName: summary.TokenName,
|
||||||
|
Quota: summary.Quota,
|
||||||
|
Content: logContent,
|
||||||
|
TokenId: relayInfo.TokenId,
|
||||||
|
UseTimeSeconds: int(summary.UseTimeSeconds),
|
||||||
|
IsStream: relayInfo.IsStream,
|
||||||
|
Group: relayInfo.UsingGroup,
|
||||||
|
Other: other,
|
||||||
|
})
|
||||||
|
}
|
||||||
206
service/text_quota_test.go
Normal file
206
service/text_quota_test.go
Normal file
@@ -0,0 +1,206 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/QuantumNous/new-api/dto"
|
||||||
|
relaycommon "github.com/QuantumNous/new-api/relay/common"
|
||||||
|
"github.com/QuantumNous/new-api/types"
|
||||||
|
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestCalculateTextQuotaSummaryUnifiedForClaudeSemantic(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
ctx, _ := gin.CreateTestContext(w)
|
||||||
|
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 1000,
|
||||||
|
CompletionTokens: 200,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 100,
|
||||||
|
CachedCreationTokens: 50,
|
||||||
|
},
|
||||||
|
ClaudeCacheCreation5mTokens: 10,
|
||||||
|
ClaudeCacheCreation1hTokens: 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
priceData := types.PriceData{
|
||||||
|
ModelRatio: 1,
|
||||||
|
CompletionRatio: 2,
|
||||||
|
CacheRatio: 0.1,
|
||||||
|
CacheCreationRatio: 1.25,
|
||||||
|
CacheCreation5mRatio: 1.25,
|
||||||
|
CacheCreation1hRatio: 2,
|
||||||
|
GroupRatioInfo: types.GroupRatioInfo{
|
||||||
|
GroupRatio: 1,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
chatRelayInfo := &relaycommon.RelayInfo{
|
||||||
|
RelayFormat: types.RelayFormatOpenAI,
|
||||||
|
FinalRequestRelayFormat: types.RelayFormatClaude,
|
||||||
|
OriginModelName: "claude-3-7-sonnet",
|
||||||
|
PriceData: priceData,
|
||||||
|
StartTime: time.Now(),
|
||||||
|
}
|
||||||
|
messageRelayInfo := &relaycommon.RelayInfo{
|
||||||
|
RelayFormat: types.RelayFormatClaude,
|
||||||
|
FinalRequestRelayFormat: types.RelayFormatClaude,
|
||||||
|
OriginModelName: "claude-3-7-sonnet",
|
||||||
|
PriceData: priceData,
|
||||||
|
StartTime: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
chatSummary := calculateTextQuotaSummary(ctx, chatRelayInfo, usage)
|
||||||
|
messageSummary := calculateTextQuotaSummary(ctx, messageRelayInfo, usage)
|
||||||
|
|
||||||
|
require.Equal(t, messageSummary.Quota, chatSummary.Quota)
|
||||||
|
require.Equal(t, messageSummary.CacheCreationTokens5m, chatSummary.CacheCreationTokens5m)
|
||||||
|
require.Equal(t, messageSummary.CacheCreationTokens1h, chatSummary.CacheCreationTokens1h)
|
||||||
|
require.True(t, chatSummary.IsClaudeUsageSemantic)
|
||||||
|
require.Equal(t, 1488, chatSummary.Quota)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateTextQuotaSummaryUsesSplitClaudeCacheCreationRatios(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
ctx, _ := gin.CreateTestContext(w)
|
||||||
|
|
||||||
|
relayInfo := &relaycommon.RelayInfo{
|
||||||
|
RelayFormat: types.RelayFormatOpenAI,
|
||||||
|
FinalRequestRelayFormat: types.RelayFormatClaude,
|
||||||
|
OriginModelName: "claude-3-7-sonnet",
|
||||||
|
PriceData: types.PriceData{
|
||||||
|
ModelRatio: 1,
|
||||||
|
CompletionRatio: 1,
|
||||||
|
CacheRatio: 0,
|
||||||
|
CacheCreationRatio: 1,
|
||||||
|
CacheCreation5mRatio: 2,
|
||||||
|
CacheCreation1hRatio: 3,
|
||||||
|
GroupRatioInfo: types.GroupRatioInfo{
|
||||||
|
GroupRatio: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
StartTime: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 100,
|
||||||
|
CompletionTokens: 0,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedCreationTokens: 10,
|
||||||
|
},
|
||||||
|
ClaudeCacheCreation5mTokens: 2,
|
||||||
|
ClaudeCacheCreation1hTokens: 3,
|
||||||
|
}
|
||||||
|
|
||||||
|
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||||
|
|
||||||
|
// 100 + remaining(5)*1 + 2*2 + 3*3 = 118
|
||||||
|
require.Equal(t, 118, summary.Quota)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateTextQuotaSummaryUsesAnthropicUsageSemanticFromUpstreamUsage(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
ctx, _ := gin.CreateTestContext(w)
|
||||||
|
|
||||||
|
relayInfo := &relaycommon.RelayInfo{
|
||||||
|
RelayFormat: types.RelayFormatOpenAI,
|
||||||
|
OriginModelName: "claude-3-7-sonnet",
|
||||||
|
PriceData: types.PriceData{
|
||||||
|
ModelRatio: 1,
|
||||||
|
CompletionRatio: 2,
|
||||||
|
CacheRatio: 0.1,
|
||||||
|
CacheCreationRatio: 1.25,
|
||||||
|
CacheCreation5mRatio: 1.25,
|
||||||
|
CacheCreation1hRatio: 2,
|
||||||
|
GroupRatioInfo: types.GroupRatioInfo{
|
||||||
|
GroupRatio: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
StartTime: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 1000,
|
||||||
|
CompletionTokens: 200,
|
||||||
|
UsageSemantic: "anthropic",
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 100,
|
||||||
|
CachedCreationTokens: 50,
|
||||||
|
},
|
||||||
|
ClaudeCacheCreation5mTokens: 10,
|
||||||
|
ClaudeCacheCreation1hTokens: 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||||
|
|
||||||
|
require.True(t, summary.IsClaudeUsageSemantic)
|
||||||
|
require.Equal(t, "anthropic", summary.UsageSemantic)
|
||||||
|
require.Equal(t, 1488, summary.Quota)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCacheWriteTokensTotal(t *testing.T) {
|
||||||
|
t.Run("split cache creation", func(t *testing.T) {
|
||||||
|
summary := textQuotaSummary{
|
||||||
|
CacheCreationTokens: 50,
|
||||||
|
CacheCreationTokens5m: 10,
|
||||||
|
CacheCreationTokens1h: 20,
|
||||||
|
}
|
||||||
|
require.Equal(t, 50, cacheWriteTokensTotal(summary))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("legacy cache creation", func(t *testing.T) {
|
||||||
|
summary := textQuotaSummary{CacheCreationTokens: 50}
|
||||||
|
require.Equal(t, 50, cacheWriteTokensTotal(summary))
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("split cache creation without aggregate remainder", func(t *testing.T) {
|
||||||
|
summary := textQuotaSummary{
|
||||||
|
CacheCreationTokens5m: 10,
|
||||||
|
CacheCreationTokens1h: 20,
|
||||||
|
}
|
||||||
|
require.Equal(t, 30, cacheWriteTokensTotal(summary))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testing.T) {
|
||||||
|
gin.SetMode(gin.TestMode)
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
ctx, _ := gin.CreateTestContext(w)
|
||||||
|
|
||||||
|
relayInfo := &relaycommon.RelayInfo{
|
||||||
|
RelayFormat: types.RelayFormatOpenAI,
|
||||||
|
OriginModelName: "claude-3-7-sonnet",
|
||||||
|
PriceData: types.PriceData{
|
||||||
|
ModelRatio: 1,
|
||||||
|
CompletionRatio: 5,
|
||||||
|
CacheRatio: 0.1,
|
||||||
|
CacheCreationRatio: 1.25,
|
||||||
|
CacheCreation5mRatio: 1.25,
|
||||||
|
CacheCreation1hRatio: 2,
|
||||||
|
GroupRatioInfo: types.GroupRatioInfo{GroupRatio: 1},
|
||||||
|
},
|
||||||
|
StartTime: time.Now(),
|
||||||
|
}
|
||||||
|
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 62,
|
||||||
|
CompletionTokens: 95,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 3544,
|
||||||
|
},
|
||||||
|
ClaudeCacheCreation5mTokens: 586,
|
||||||
|
}
|
||||||
|
|
||||||
|
summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
|
||||||
|
|
||||||
|
// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
|
||||||
|
require.Equal(t, 1624, summary.Quota)
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user