diff --git a/dto/openai_request.go b/dto/openai_request.go index 42c290ca..2a3263a1 100644 --- a/dto/openai_request.go +++ b/dto/openai_request.go @@ -57,6 +57,7 @@ type GeneralOpenAIRequest struct { ExtraBody json.RawMessage `json:"extra_body,omitempty"` WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"` // OpenRouter Params + Usage json.RawMessage `json:"usage,omitempty"` Reasoning json.RawMessage `json:"reasoning,omitempty"` // Ali Qwen Params VlHighResolutionImages json.RawMessage `json:"vl_high_resolution_images,omitempty"` diff --git a/dto/openai_response.go b/dto/openai_response.go index 6f6b12f9..d95acd9e 100644 --- a/dto/openai_response.go +++ b/dto/openai_response.go @@ -178,6 +178,8 @@ type Usage struct { InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` InputTokensDetails *InputTokenDetails `json:"input_tokens_details"` + // OpenRouter Params + Cost float64 `json:"cost,omitempty"` } type InputTokenDetails struct { diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index 8358f3e2..424fd3df 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -159,6 +159,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn if info.ChannelType != common.ChannelTypeOpenAI && info.ChannelType != common.ChannelTypeAzure { request.StreamOptions = nil } + if info.ChannelType == common.ChannelTypeOpenRouter { + if len(request.Usage) == 0 { + request.Usage = json.RawMessage(`{"include":true}`) + } + } if strings.HasPrefix(request.Model, "o") { if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 { request.MaxCompletionTokens = request.MaxTokens diff --git a/service/convert.go b/service/convert.go index 7a9e8403..df7acf0d 100644 --- a/service/convert.go +++ b/service/convert.go @@ -276,12 +276,15 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon } if info.Done { claudeResponses = append(claudeResponses, generateStopBlock(info.ClaudeConvertInfo.Index)) - if info.ClaudeConvertInfo.Usage != nil { + oaiUsage := info.ClaudeConvertInfo.Usage + if oaiUsage != nil { claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ Type: "message_delta", Usage: &dto.ClaudeUsage{ - InputTokens: info.ClaudeConvertInfo.Usage.PromptTokens, - OutputTokens: info.ClaudeConvertInfo.Usage.CompletionTokens, + InputTokens: oaiUsage.PromptTokens, + OutputTokens: oaiUsage.CompletionTokens, + CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens, + CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens, }, Delta: &dto.ClaudeMediaMessage{ StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)), diff --git a/service/quota.go b/service/quota.go index 973deba7..8005a1fb 100644 --- a/service/quota.go +++ b/service/quota.go @@ -4,6 +4,7 @@ import ( "errors" "fmt" "log" + "math" "one-api/common" constant2 "one-api/constant" "one-api/dto" @@ -231,6 +232,17 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, cacheCreationRatio := priceData.CacheCreationRatio cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens + if relayInfo.ChannelType == common.ChannelTypeOpenRouter { + promptTokens -= cacheTokens + if cacheCreationTokens == 0 && priceData.CacheCreationRatio != 1 && usage.Cost != 0 { + maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, priceData) + if promptTokens >= maybeCacheCreationTokens { + cacheCreationTokens = maybeCacheCreationTokens + } + } + promptTokens -= cacheCreationTokens + } + calculateQuota := 0.0 if !priceData.UsePrice { calculateQuota = float64(promptTokens) @@ -278,6 +290,27 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other) } +func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData helper.PriceData) int { + if priceData.CacheCreationRatio == 1 { + return 0 + } + quotaPrice := priceData.ModelRatio / common.QuotaPerUnit + promptCacheCreatePrice := quotaPrice * priceData.CacheCreationRatio + promptCacheReadPrice := quotaPrice * priceData.CacheRatio + completionPrice := quotaPrice * priceData.CompletionRatio + + cost := usage.Cost + totalPromptTokens := float64(usage.PromptTokens) + completionTokens := float64(usage.CompletionTokens) + promptCacheReadTokens := float64(usage.PromptTokensDetails.CachedTokens) + + return int(math.Round((cost - + totalPromptTokens*quotaPrice + + promptCacheReadTokens*(quotaPrice-promptCacheReadPrice) - + completionTokens*completionPrice) / + (promptCacheCreatePrice - quotaPrice))) +} + func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, preConsumedQuota int, userQuota int, priceData helper.PriceData, extraContent string) {