From 66778efcc5599191c200ee723f49ba0be2af5b83 Mon Sep 17 00:00:00 2001
From: neotf <10400594+neotf@users.noreply.github.com>
Date: Thu, 29 May 2025 00:49:21 +0800
Subject: [PATCH 1/7] feat: enhance token usage details for upstream OpenRouter

---
 dto/openai_request.go           | 76 +++++++++++++++++----------------
 relay/channel/openai/adaptor.go |  3 ++
 service/convert.go              |  9 ++--
 3 files changed, 48 insertions(+), 40 deletions(-)

diff --git a/dto/openai_request.go b/dto/openai_request.go
index bda1bb17..9e3a41ac 100644
--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -18,43 +18,45 @@ type FormatJsonSchema struct {
 }
 
 type GeneralOpenAIRequest struct {
-	Model               string         `json:"model,omitempty"`
-	Messages            []Message      `json:"messages,omitempty"`
-	Prompt              any            `json:"prompt,omitempty"`
-	Prefix              any            `json:"prefix,omitempty"`
-	Suffix              any            `json:"suffix,omitempty"`
-	Stream              bool           `json:"stream,omitempty"`
-	StreamOptions       *StreamOptions `json:"stream_options,omitempty"`
-	MaxTokens           uint           `json:"max_tokens,omitempty"`
-	MaxCompletionTokens uint           `json:"max_completion_tokens,omitempty"`
-	ReasoningEffort     string         `json:"reasoning_effort,omitempty"`
-	//Reasoning           json.RawMessage   `json:"reasoning,omitempty"`
-	Temperature      *float64          `json:"temperature,omitempty"`
-	TopP             float64           `json:"top_p,omitempty"`
-	TopK             int               `json:"top_k,omitempty"`
-	Stop             any               `json:"stop,omitempty"`
-	N                int               `json:"n,omitempty"`
-	Input            any               `json:"input,omitempty"`
-	Instruction      string            `json:"instruction,omitempty"`
-	Size             string            `json:"size,omitempty"`
-	Functions        any               `json:"functions,omitempty"`
-	FrequencyPenalty float64           `json:"frequency_penalty,omitempty"`
-	PresencePenalty  float64           `json:"presence_penalty,omitempty"`
-	ResponseFormat   *ResponseFormat   `json:"response_format,omitempty"`
-	EncodingFormat   any               `json:"encoding_format,omitempty"`
-	Seed             float64           `json:"seed,omitempty"`
-	ParallelTooCalls *bool             `json:"parallel_tool_calls,omitempty"`
-	Tools            []ToolCallRequest `json:"tools,omitempty"`
-	ToolChoice       any               `json:"tool_choice,omitempty"`
-	User             string            `json:"user,omitempty"`
-	LogProbs         bool              `json:"logprobs,omitempty"`
-	TopLogProbs      int               `json:"top_logprobs,omitempty"`
-	Dimensions       int               `json:"dimensions,omitempty"`
-	Modalities       any               `json:"modalities,omitempty"`
-	Audio            any               `json:"audio,omitempty"`
-	EnableThinking   any               `json:"enable_thinking,omitempty"` // ali
-	ExtraBody        any               `json:"extra_body,omitempty"`
-	WebSearchOptions *WebSearchOptions `json:"web_search_options,omitempty"`
+	Model               string            `json:"model,omitempty"`
+	Messages            []Message         `json:"messages,omitempty"`
+	Prompt              any               `json:"prompt,omitempty"`
+	Prefix              any               `json:"prefix,omitempty"`
+	Suffix              any               `json:"suffix,omitempty"`
+	Stream              bool              `json:"stream,omitempty"`
+	StreamOptions       *StreamOptions    `json:"stream_options,omitempty"`
+	MaxTokens           uint              `json:"max_tokens,omitempty"`
+	MaxCompletionTokens uint              `json:"max_completion_tokens,omitempty"`
+	ReasoningEffort     string            `json:"reasoning_effort,omitempty"`
+	Temperature         *float64          `json:"temperature,omitempty"`
+	TopP                float64           `json:"top_p,omitempty"`
+	TopK                int               `json:"top_k,omitempty"`
+	Stop                any               `json:"stop,omitempty"`
+	N                   int               `json:"n,omitempty"`
+	Input               any               `json:"input,omitempty"`
+	Instruction         string            `json:"instruction,omitempty"`
+	Size                string            `json:"size,omitempty"`
+	Functions           any               `json:"functions,omitempty"`
+	FrequencyPenalty    float64           `json:"frequency_penalty,omitempty"`
+	PresencePenalty     float64           `json:"presence_penalty,omitempty"`
+	ResponseFormat      *ResponseFormat   `json:"response_format,omitempty"`
+	EncodingFormat      any               `json:"encoding_format,omitempty"`
+	Seed                float64           `json:"seed,omitempty"`
+	ParallelTooCalls    *bool             `json:"parallel_tool_calls,omitempty"`
+	Tools               []ToolCallRequest `json:"tools,omitempty"`
+	ToolChoice          any               `json:"tool_choice,omitempty"`
+	User                string            `json:"user,omitempty"`
+	LogProbs            bool              `json:"logprobs,omitempty"`
+	TopLogProbs         int               `json:"top_logprobs,omitempty"`
+	Dimensions          int               `json:"dimensions,omitempty"`
+	Modalities          any               `json:"modalities,omitempty"`
+	Audio               any               `json:"audio,omitempty"`
+	EnableThinking      any               `json:"enable_thinking,omitempty"` // ali
+	ExtraBody           any               `json:"extra_body,omitempty"`
+	WebSearchOptions    *WebSearchOptions `json:"web_search_options,omitempty"`
+	// OpenRouter Params
+	Usage     json.RawMessage `json:"usage,omitempty"`
+	Reasoning json.RawMessage `json:"reasoning,omitempty"`
 }
 
 type ToolCallRequest struct {
diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go
index f0cf073f..cef958b2 100644
--- a/relay/channel/openai/adaptor.go
+++ b/relay/channel/openai/adaptor.go
@@ -152,6 +152,9 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	if info.ChannelType != common.ChannelTypeOpenAI && info.ChannelType != common.ChannelTypeAzure {
 		request.StreamOptions = nil
 	}
+	if info.ChannelType == common.ChannelTypeOpenRouter {
+		request.Usage = json.RawMessage("{\"include\": true}")
+	}
 	if strings.HasPrefix(request.Model, "o") {
 		if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 {
 			request.MaxCompletionTokens = request.MaxTokens
diff --git a/service/convert.go b/service/convert.go
index cc462b40..67e77903 100644
--- a/service/convert.go
+++ b/service/convert.go
@@ -246,12 +246,15 @@ func StreamResponseOpenAI2Claude(openAIResponse *dto.ChatCompletionsStreamRespon
 		}
 		if info.Done {
 			claudeResponses = append(claudeResponses, generateStopBlock(info.ClaudeConvertInfo.Index))
-			if info.ClaudeConvertInfo.Usage != nil {
+			oaiUsage := info.ClaudeConvertInfo.Usage
+			if oaiUsage != nil {
 				claudeResponses = append(claudeResponses, &dto.ClaudeResponse{
 					Type: "message_delta",
 					Usage: &dto.ClaudeUsage{
-						InputTokens:  info.ClaudeConvertInfo.Usage.PromptTokens,
-						OutputTokens: info.ClaudeConvertInfo.Usage.CompletionTokens,
+						InputTokens:              oaiUsage.PromptTokens,
+						OutputTokens:             oaiUsage.CompletionTokens,
+						CacheCreationInputTokens: oaiUsage.PromptTokensDetails.CachedCreationTokens,
+						CacheReadInputTokens:     oaiUsage.PromptTokensDetails.CachedTokens,
 					},
 					Delta: &dto.ClaudeMediaMessage{
 						StopReason: common.GetPointer[string](stopReasonOpenAI2Claude(info.FinishReason)),

From 3d9587f128a20b786c464a9f77ace143f4f426d8 Mon Sep 17 00:00:00 2001
From: neotf <10400594+neotf@users.noreply.github.com>
Date: Thu, 29 May 2025 22:24:29 +0800
Subject: [PATCH 2/7] feat: enhance cache_create_tokens calculation for
 OpenRouter

---
 dto/openai_response.go |  2 ++
 service/quota.go       | 27 +++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/dto/openai_response.go b/dto/openai_response.go
index 790d4df8..fb4aeb4c 100644
--- a/dto/openai_response.go
+++ b/dto/openai_response.go
@@ -178,6 +178,8 @@ type Usage struct {
 	InputTokens            int                `json:"input_tokens"`
 	OutputTokens           int                `json:"output_tokens"`
 	InputTokensDetails     *InputTokenDetails `json:"input_tokens_details"`
+	// OpenRouter Params
+	Cost float64 `json:"cost,omitempty"`
 }
 
 type InputTokenDetails struct {
diff --git a/service/quota.go b/service/quota.go
index 0d11b4a0..43297b4a 100644
--- a/service/quota.go
+++ b/service/quota.go
@@ -3,6 +3,7 @@ package service
 import (
 	"errors"
 	"fmt"
+	"math"
 	"one-api/common"
 	constant2 "one-api/constant"
 	"one-api/dto"
@@ -214,6 +215,11 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	cacheCreationRatio := priceData.CacheCreationRatio
 	cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
 
+	if relayInfo.ChannelType == common.ChannelTypeOpenRouter && priceData.CacheCreationRatio != 1 {
+		cacheCreationTokens = CalcOpenRouterCacheCreateTokens(*usage, priceData)
+		promptTokens = promptTokens - cacheCreationTokens - cacheTokens
+	}
+
 	calculateQuota := 0.0
 	if !priceData.UsePrice {
 		calculateQuota = float64(promptTokens)
@@ -261,6 +267,27 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 		tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
 }
 
+func CalcOpenRouterCacheCreateTokens(usage dto.Usage, priceData helper.PriceData) int {
+	if priceData.CacheCreationRatio == 1 {
+		return 0
+	}
+	quotaPrice := priceData.ModelRatio / common.QuotaPerUnit
+	promptCacheCreatePrice := quotaPrice * priceData.CacheCreationRatio
+	promptCacheReadPrice := quotaPrice * priceData.CacheRatio
+	completionPrice := quotaPrice * priceData.CompletionRatio
+
+	cost := usage.Cost
+	totalPromptTokens := float64(usage.PromptTokens)
+	completionTokens := float64(usage.CompletionTokens)
+	promptCacheReadTokens := float64(usage.PromptTokensDetails.CachedTokens)
+
+	return int(math.Round((cost -
+		totalPromptTokens*quotaPrice +
+		promptCacheReadTokens*(quotaPrice-promptCacheReadPrice) -
+		completionTokens*completionPrice) /
+		(promptCacheCreatePrice - quotaPrice)))
+}
+
 func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	usage *dto.Usage, preConsumedQuota int, userQuota int, priceData helper.PriceData, extraContent string) {
 

From c4f25a77d1af97998f66f5cc7f1c3942994135ec Mon Sep 17 00:00:00 2001
From: neotf <neotf@users.noreply.github.com>
Date: Wed, 11 Jun 2025 13:56:44 +0800
Subject: [PATCH 3/7] format

---
 dto/openai_request.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dto/openai_request.go b/dto/openai_request.go
index a51dffd8..50dee203 100644
--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -56,7 +56,7 @@ type GeneralOpenAIRequest struct {
 	ExtraBody           json.RawMessage   `json:"extra_body,omitempty"`
 	WebSearchOptions    *WebSearchOptions `json:"web_search_options,omitempty"`
 	// OpenRouter Params
-  Usage     json.RawMessage `json:"usage,omitempty"`
+	Usage     json.RawMessage `json:"usage,omitempty"`  
 	Reasoning json.RawMessage `json:"reasoning,omitempty"`
 }
 

From d67d5d800671c9087e245383cab7c180a2b3c821 Mon Sep 17 00:00:00 2001
From: neotf <neotf@users.noreply.github.com>
Date: Wed, 11 Jun 2025 14:00:32 +0800
Subject: [PATCH 4/7] format

---
 dto/openai_request.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dto/openai_request.go b/dto/openai_request.go
index 50dee203..10e10332 100644
--- a/dto/openai_request.go
+++ b/dto/openai_request.go
@@ -56,7 +56,7 @@ type GeneralOpenAIRequest struct {
 	ExtraBody           json.RawMessage   `json:"extra_body,omitempty"`
 	WebSearchOptions    *WebSearchOptions `json:"web_search_options,omitempty"`
 	// OpenRouter Params
-	Usage     json.RawMessage `json:"usage,omitempty"`  
+	Usage     json.RawMessage `json:"usage,omitempty"`
 	Reasoning json.RawMessage `json:"reasoning,omitempty"`
 }
 

From a6363a502ad239610281fe078df8ec1158bfc461 Mon Sep 17 00:00:00 2001
From: neotf <neotf@users.noreply.github.com>
Date: Wed, 18 Jun 2025 15:29:19 +0800
Subject: [PATCH 5/7] Update relay/channel/openai/adaptor.go

use review's suggestion

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 relay/channel/openai/adaptor.go | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go
index ea24d811..451ed408 100644
--- a/relay/channel/openai/adaptor.go
+++ b/relay/channel/openai/adaptor.go
@@ -159,9 +159,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	if info.ChannelType != common.ChannelTypeOpenAI && info.ChannelType != common.ChannelTypeAzure {
 		request.StreamOptions = nil
 	}
-	if info.ChannelType == common.ChannelTypeOpenRouter {
-		request.Usage = json.RawMessage("{\"include\": true}")
-	}
+if info.ChannelType == common.ChannelTypeOpenRouter {
+    if len(request.Usage) == 0 {
+        request.Usage = json.RawMessage(`{"include":true}`)
+    }
+}
 	if strings.HasPrefix(request.Model, "o") {
 		if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 {
 			request.MaxCompletionTokens = request.MaxTokens

From 37fbcb7950a122aadada77c0dfdffae928b16242 Mon Sep 17 00:00:00 2001
From: neotf <10400594+neotf@users.noreply.github.com>
Date: Wed, 18 Jun 2025 19:54:20 +0800
Subject: [PATCH 6/7] format

---
 relay/channel/openai/adaptor.go | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/relay/channel/openai/adaptor.go b/relay/channel/openai/adaptor.go
index 451ed408..424fd3df 100644
--- a/relay/channel/openai/adaptor.go
+++ b/relay/channel/openai/adaptor.go
@@ -159,11 +159,11 @@ func (a *Adaptor) ConvertOpenAIRequest(c *gin.Context, info *relaycommon.RelayIn
 	if info.ChannelType != common.ChannelTypeOpenAI && info.ChannelType != common.ChannelTypeAzure {
 		request.StreamOptions = nil
 	}
-if info.ChannelType == common.ChannelTypeOpenRouter {
-    if len(request.Usage) == 0 {
-        request.Usage = json.RawMessage(`{"include":true}`)
-    }
-}
+	if info.ChannelType == common.ChannelTypeOpenRouter {
+		if len(request.Usage) == 0 {
+			request.Usage = json.RawMessage(`{"include":true}`)
+		}
+	}
 	if strings.HasPrefix(request.Model, "o") {
 		if request.MaxCompletionTokens == 0 && request.MaxTokens != 0 {
 			request.MaxCompletionTokens = request.MaxTokens

From 16c63b3be9a1935df7a5f0c24a238f0bd3aaa21c Mon Sep 17 00:00:00 2001
From: neotf <10400594+neotf@users.noreply.github.com>
Date: Wed, 18 Jun 2025 20:11:48 +0800
Subject: [PATCH 7/7] fix(quota): refine cache token calculation for OpenRouter
 channel type

---
 service/quota.go | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/service/quota.go b/service/quota.go
index 8c7ed07e..33cc65d7 100644
--- a/service/quota.go
+++ b/service/quota.go
@@ -232,9 +232,15 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
 	cacheCreationRatio := priceData.CacheCreationRatio
 	cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
 
-	if relayInfo.ChannelType == common.ChannelTypeOpenRouter && priceData.CacheCreationRatio != 1 {
-		cacheCreationTokens = CalcOpenRouterCacheCreateTokens(*usage, priceData)
-		promptTokens = promptTokens - cacheCreationTokens - cacheTokens
+	if relayInfo.ChannelType == common.ChannelTypeOpenRouter {
+		promptTokens -= cacheTokens
+		if cacheCreationTokens == 0 && priceData.CacheCreationRatio != 1 && usage.Cost != 0 {
+			maybeCacheCreationTokens := CalcOpenRouterCacheCreateTokens(*usage, priceData)
+			if promptTokens >= maybeCacheCreationTokens {
+				cacheCreationTokens = maybeCacheCreationTokens
+			}
+		}
+		promptTokens -= cacheCreationTokens
 	}
 
 	calculateQuota := 0.0