From 66778efcc5599191c200ee723f49ba0be2af5b83 Mon Sep 17 00:00:00 2001 From: neotf <10400594+neotf@users.noreply.github.com> Date: Thu, 29 May 2025 00:49:21 +0800 Subject: [PATCH 1/7] feat: enhance token usage details for upstream OpenRouter --- dto/openai_request.go | 76 +++++++++++++++++---------------- relay/channel/openai/adaptor.go | 3 ++ service/convert.go | 9 ++-- 3 files changed, 48 insertions(+), 40 deletions(-) diff --git a/dto/openai_request.go b/dto/openai_request.go index bda1bb17..9e3a41ac 100644 --- a/dto/openai_request.go +++ b/dto/openai_request.go @@ -18,43 +18,45 @@ type FormatJsonSchema struct { } type GeneralOpenAIRequest struct { - Model string `json:"model,omitempty"` - Messages []Message `json:"messages,omitempty"` - Prompt any `json:"prompt,omitempty"` - Prefix any `json:"prefix,omitempty"` - Suffix any `json:"suffix,omitempty"` - Stream bool `json:"stream,omitempty"` - StreamOptions *StreamOptions `json:"stream_options,omitempty"` - MaxTokens uint `json:"max_tokens,omitempty"` - MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"` - ReasoningEffort string `json:"reasoning_effort,omitempty"` - //Reasoning json.RawMessage `json:"reasoning,omitempty"` - Temperature *float64 `json:"temperature,omitempty"` - TopP float64 `json:"top_p,omitempty"` - TopK int `json:"top_k,omitempty"` - Stop any `json:"stop,omitempty"` - N int `json:"n,omitempty"` - Input any `json:"input,omitempty"` - Instruction string `json:"instruction,omitempty"` - Size string `json:"size,omitempty"` - Functions any `json:"functions,omitempty"` - FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` - PresencePenalty float64 `json:"presence_penalty,omitempty"` - ResponseFormat *ResponseFormat `json:"response_format,omitempty"` - EncodingFormat any `json:"encoding_format,omitempty"` - Seed float64 `json:"seed,omitempty"` - ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"` - Tools []ToolCallRequest `json:"tools,omitempty"` - ToolChoice any `json:"tool_choice,omitempty"` - User string `json:"user,omitempty"` - LogProbs bool `json:"logprobs,omitempty"` - TopLogProbs int `json:"top_logprobs,omitempty"` - Dimensions int `json:"dimensions,omitempty"` - Modalities any `json:"modalities,omitempty"` - Audio any `json:"audio,omitempty"` - EnableThinking any `json:"enable_thinking,omitempty"` // ali - ExtraBody any `json:"extra_body,omitempty"` - WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"` + Model string `json:"model,omitempty"` + Messages []Message `json:"messages,omitempty"` + Prompt any `json:"prompt,omitempty"` + Prefix any `json:"prefix,omitempty"` + Suffix any `json:"suffix,omitempty"` + Stream bool `json:"stream,omitempty"` + StreamOptions *StreamOptions `json:"stream_options,omitempty"` + MaxTokens uint `json:"max_tokens,omitempty"` + MaxCompletionTokens uint `json:"max_completion_tokens,omitempty"` + ReasoningEffort string `json:"reasoning_effort,omitempty"` + Temperature *float64 `json:"temperature,omitempty"` + TopP float64 `json:"top_p,omitempty"` + TopK int `json:"top_k,omitempty"` + Stop any `json:"stop,omitempty"` + N int `json:"n,omitempty"` + Input any `json:"input,omitempty"` + Instruction string `json:"instruction,omitempty"` + Size string `json:"size,omitempty"` + Functions any `json:"functions,omitempty"` + FrequencyPenalty float64 `json:"frequency_penalty,omitempty"` + PresencePenalty float64 `json:"presence_penalty,omitempty"` + ResponseFormat *ResponseFormat `json:"response_format,omitempty"` + EncodingFormat any `json:"encoding_format,omitempty"` + Seed float64 `json:"seed,omitempty"` + ParallelTooCalls *bool `json:"parallel_tool_calls,omitempty"` + Tools []ToolCallRequest `json:"tools,omitempty"` + ToolChoice any `json:"tool_choice,omitempty"` + User string `json:"user,omitempty"` + LogProbs bool `json:"logprobs,omitempty"` + TopLogProbs int `json:"top_logprobs,omitempty"` + Dimensions int `json:"dimensions,omitempty"` + Modalities any `json:"modalities,omitempty"` + Audio any `json:"audio,omitempty"` + EnableThinking any `json:"enable_thinking,omitempty"` // ali + ExtraBody any `json:"extra_body,omitempty"` + WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"` + // OpenRouter Params + Usage json.RawMessage `json:"usage,omitempty"` + Reasoning json.RawMessage `json:"reasoning,omitempty"` } type ToolCallRequest struct { diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index f0cf073f..cef958b2 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -152,6 +152,9 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn if info.ChannelType != common.ChannelTypeOpenAI && info.ChannelType != common.ChannelTypeAzure { request.StreamOptions = nil } + if info.ChannelType == common.ChannelTypeOpenRouter { + request.Usage = json.RawMessage("{\"include\": true}") + } if strings.HasPrefix(request.Model, "o") { if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 { request.MaxCompletionTokens = request.MaxTokens diff --git a/service/convert.go b/service/convert.go index cc462b40..67e77903 100644 --- a/service/convert.go +++ b/service/convert.go @@ -246,12 +246,15 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon } if info.Done { claudeResponses = append(claudeResponses, generateStopBlock(info.ClaudeConvertInfo.Index)) - if info.ClaudeConvertInfo.Usage != nil { + oaiUsage := info.ClaudeConvertInfo.Usage + if oaiUsage != nil { claudeResponses = append(claudeResponses, &dto.ClaudeResponse{ Type: "message_delta", Usage: &dto.ClaudeUsage{ - InputTokens: info.ClaudeConvertInfo.Usage.PromptTokens, - OutputTokens: info.ClaudeConvertInfo.Usage.CompletionTokens, + InputTokens: oaiUsage.PromptTokens, + OutputTokens: oaiUsage.CompletionTokens, + CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens, + CacheReadInputTokens: oaiUsage.PromptTokensDetails.CachedTokens, }, Delta: &dto.ClaudeMediaMessage{ StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)), From 3d9587f128a20b786c464a9f77ace143f4f426d8 Mon Sep 17 00:00:00 2001 From: neotf <10400594+neotf@users.noreply.github.com> Date: Thu, 29 May 2025 22:24:29 +0800 Subject: [PATCH 2/7] feat: enhance cache_create_tokens calculation for OpenRouter --- dto/openai_response.go | 2 ++ service/quota.go | 27 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/dto/openai_response.go b/dto/openai_response.go index 790d4df8..fb4aeb4c 100644 --- a/dto/openai_response.go +++ b/dto/openai_response.go @@ -178,6 +178,8 @@ type Usage struct { InputTokens int `json:"input_tokens"` OutputTokens int `json:"output_tokens"` InputTokensDetails *InputTokenDetails `json:"input_tokens_details"` + // OpenRouter Params + Cost float64 `json:"cost,omitempty"` } type InputTokenDetails struct { diff --git a/service/quota.go b/service/quota.go index 0d11b4a0..43297b4a 100644 --- a/service/quota.go +++ b/service/quota.go @@ -3,6 +3,7 @@ package service import ( "errors" "fmt" + "math" "one-api/common" constant2 "one-api/constant" "one-api/dto" @@ -214,6 +215,11 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, cacheCreationRatio := priceData.CacheCreationRatio cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens + if relayInfo.ChannelType == common.ChannelTypeOpenRouter && priceData.CacheCreationRatio != 1 { + cacheCreationTokens = CalcOpenRouterCacheCreateTokens(*usage, priceData) + promptTokens = promptTokens - cacheCreationTokens - cacheTokens + } + calculateQuota := 0.0 if !priceData.UsePrice { calculateQuota = float64(promptTokens) @@ -261,6 +267,27 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other) } +func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData helper.PriceData) int { + if priceData.CacheCreationRatio == 1 { + return 0 + } + quotaPrice := priceData.ModelRatio / common.QuotaPerUnit + promptCacheCreatePrice := quotaPrice * priceData.CacheCreationRatio + promptCacheReadPrice := quotaPrice * priceData.CacheRatio + completionPrice := quotaPrice * priceData.CompletionRatio + + cost := usage.Cost + totalPromptTokens := float64(usage.PromptTokens) + completionTokens := float64(usage.CompletionTokens) + promptCacheReadTokens := float64(usage.PromptTokensDetails.CachedTokens) + + return int(math.Round((cost - + totalPromptTokens*quotaPrice + + promptCacheReadTokens*(quotaPrice-promptCacheReadPrice) - + completionTokens*completionPrice) / + (promptCacheCreatePrice - quotaPrice))) +} + func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, preConsumedQuota int, userQuota int, priceData helper.PriceData, extraContent string) { From c4f25a77d1af97998f66f5cc7f1c3942994135ec Mon Sep 17 00:00:00 2001 From: neotf Date: Wed, 11 Jun 2025 13:56:44 +0800 Subject: [PATCH 3/7] format --- dto/openai_request.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dto/openai_request.go b/dto/openai_request.go index a51dffd8..50dee203 100644 --- a/dto/openai_request.go +++ b/dto/openai_request.go @@ -56,7 +56,7 @@ type GeneralOpenAIRequest struct { ExtraBody json.RawMessage `json:"extra_body,omitempty"` WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"` // OpenRouter Params - Usage json.RawMessage `json:"usage,omitempty"` + Usage json.RawMessage `json:"usage,omitempty"`  Reasoning json.RawMessage `json:"reasoning,omitempty"` } From d67d5d800671c9087e245383cab7c180a2b3c821 Mon Sep 17 00:00:00 2001 From: neotf Date: Wed, 11 Jun 2025 14:00:32 +0800 Subject: [PATCH 4/7] format --- dto/openai_request.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dto/openai_request.go b/dto/openai_request.go index 50dee203..10e10332 100644 --- a/dto/openai_request.go +++ b/dto/openai_request.go @@ -56,7 +56,7 @@ type GeneralOpenAIRequest struct { ExtraBody json.RawMessage `json:"extra_body,omitempty"` WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"` // OpenRouter Params - Usage json.RawMessage `json:"usage,omitempty"`  + Usage json.RawMessage `json:"usage,omitempty"` Reasoning json.RawMessage `json:"reasoning,omitempty"` } From a6363a502ad239610281fe078df8ec1158bfc461 Mon Sep 17 00:00:00 2001 From: neotf Date: Wed, 18 Jun 2025 15:29:19 +0800 Subject: [PATCH 5/7] Update relay/channel/openai/adaptor.go use review's suggestion Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- relay/channel/openai/adaptor.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index ea24d811..451ed408 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -159,9 +159,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn if info.ChannelType != common.ChannelTypeOpenAI && info.ChannelType != common.ChannelTypeAzure { request.StreamOptions = nil } - if info.ChannelType == common.ChannelTypeOpenRouter { - request.Usage = json.RawMessage("{\"include\": true}") - } +if info.ChannelType == common.ChannelTypeOpenRouter { + if len(request.Usage) == 0 { + request.Usage = json.RawMessage(`{"include":true}`) + } +} if strings.HasPrefix(request.Model, "o") { if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 { request.MaxCompletionTokens = request.MaxTokens From 37fbcb7950a122aadada77c0dfdffae928b16242 Mon Sep 17 00:00:00 2001 From: neotf <10400594+neotf@users.noreply.github.com> Date: Wed, 18 Jun 2025 19:54:20 +0800 Subject: [PATCH 6/7] format --- relay/channel/openai/adaptor.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go index 451ed408..424fd3df 100644 --- a/relay/channel/openai/adaptor.go +++ b/relay/channel/openai/adaptor.go @@ -159,11 +159,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn if info.ChannelType != common.ChannelTypeOpenAI && info.ChannelType != common.ChannelTypeAzure { request.StreamOptions = nil } -if info.ChannelType == common.ChannelTypeOpenRouter { - if len(request.Usage) == 0 { - request.Usage = json.RawMessage(`{"include":true}`) - } -} + if info.ChannelType == common.ChannelTypeOpenRouter { + if len(request.Usage) == 0 { + request.Usage = json.RawMessage(`{"include":true}`) + } + } if strings.HasPrefix(request.Model, "o") { if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 { request.MaxCompletionTokens = request.MaxTokens From 16c63b3be9a1935df7a5f0c24a238f0bd3aaa21c Mon Sep 17 00:00:00 2001 From: neotf <10400594+neotf@users.noreply.github.com> Date: Wed, 18 Jun 2025 20:11:48 +0800 Subject: [PATCH 7/7] fix(quota): refine cache token calculation for OpenRouter channel type --- service/quota.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/service/quota.go b/service/quota.go index 8c7ed07e..33cc65d7 100644 --- a/service/quota.go +++ b/service/quota.go @@ -232,9 +232,15 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, cacheCreationRatio := priceData.CacheCreationRatio cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens - if relayInfo.ChannelType == common.ChannelTypeOpenRouter && priceData.CacheCreationRatio != 1 { - cacheCreationTokens = CalcOpenRouterCacheCreateTokens(*usage, priceData) - promptTokens = promptTokens - cacheCreationTokens - cacheTokens + if relayInfo.ChannelType == common.ChannelTypeOpenRouter { + promptTokens -= cacheTokens + if cacheCreationTokens == 0 && priceData.CacheCreationRatio != 1 && usage.Cost != 0 { + maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, priceData) + if promptTokens >= maybeCacheCreationTokens { + cacheCreationTokens = maybeCacheCreationTokens + } + } + promptTokens -= cacheCreationTokens } calculateQuota := 0.0