From 9f61407bf05e489fb6631ea875b25979b2b44b71 Mon Sep 17 00:00:00 2001
From: Seefs <i@seefs.me>
Date: Wed, 25 Mar 2026 13:11:51 +0800
Subject: [PATCH 1/3] fix: restore pre-3400 OpenRouter billing semantics

---
 service/text_quota.go      |   8 ++-
 service/text_quota_test.go | 111 +++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/service/text_quota.go b/service/text_quota.go
index a300097e..6fe37997 100644
--- a/service/text_quota.go
+++ b/service/text_quota.go
@@ -113,8 +113,10 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
 	summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
 	summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
 	legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
+	isOpenRouter := relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter
+	isOpenRouterClaudeBilling := isOpenRouter && summary.IsClaudeUsageSemantic
 
-	if relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter {
+	if isOpenRouterClaudeBilling {
 		summary.PromptTokens -= summary.CacheTokens
 		isUsingCustomSettings := relayInfo.PriceData.UsePrice || hasCustomModelRatio(summary.ModelName, relayInfo.PriceData.ModelRatio)
 		if summary.CacheCreationTokens == 0 && relayInfo.PriceData.CacheCreationRatio != 1 && usage.Cost != 0 && !isUsingCustomSettings {
@@ -197,7 +199,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
 
 		var cachedTokensWithRatio decimal.Decimal
 		if !dCacheTokens.IsZero() {
-			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
+			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter {
 				baseTokens = baseTokens.Sub(dCacheTokens)
 			}
 			cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
@@ -206,7 +208,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
 		var cachedCreationTokensWithRatio decimal.Decimal
 		hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0
 		if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens {
-			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
+			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter {
 				baseTokens = baseTokens.Sub(dCachedCreationTokens)
 				cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio)
 			} else {
diff --git a/service/text_quota_test.go b/service/text_quota_test.go
index 4370b16e..734eacf9 100644
--- a/service/text_quota_test.go
+++ b/service/text_quota_test.go
@@ -5,6 +5,7 @@ import (
 	"testing"
 	"time"
 
+	"github.com/QuantumNous/new-api/constant"
 	"github.com/QuantumNous/new-api/dto"
 	relaycommon "github.com/QuantumNous/new-api/relay/common"
 	"github.com/QuantumNous/new-api/types"
@@ -204,3 +205,113 @@ func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testi
 	// 62 + 3544*0.1 + 586*1.25 + 95*5 = 1624.9 => 1624
 	require.Equal(t, 1624, summary.Quota)
 }
+
+func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheTokensFromPrompt(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	w := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(w)
+
+	relayInfo := &relaycommon.RelayInfo{
+		OriginModelName: "openai/gpt-4.1",
+		ChannelMeta: &relaycommon.ChannelMeta{
+			ChannelType: constant.ChannelTypeOpenRouter,
+		},
+		PriceData: types.PriceData{
+			ModelRatio:         1,
+			CompletionRatio:    1,
+			CacheRatio:         0.1,
+			CacheCreationRatio: 1.25,
+			GroupRatioInfo:     types.GroupRatioInfo{GroupRatio: 1},
+		},
+		StartTime: time.Now(),
+	}
+
+	usage := &dto.Usage{
+		PromptTokens:     2604,
+		CompletionTokens: 383,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedTokens: 2432,
+		},
+	}
+
+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
+
+	// OpenRouter usage is already normalized. prompt_tokens should stay intact.
+	// quota = 2604 + 2432*0.1 + 383 = 3230.2 => 3230
+	require.Equal(t, 2604, summary.PromptTokens)
+	require.Equal(t, 3230, summary.Quota)
+}
+
+func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheCreationTokensFromPrompt(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	w := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(w)
+
+	relayInfo := &relaycommon.RelayInfo{
+		OriginModelName: "openai/gpt-4.1",
+		ChannelMeta: &relaycommon.ChannelMeta{
+			ChannelType: constant.ChannelTypeOpenRouter,
+		},
+		PriceData: types.PriceData{
+			ModelRatio:         1,
+			CompletionRatio:    1,
+			CacheCreationRatio: 1.25,
+			GroupRatioInfo:     types.GroupRatioInfo{GroupRatio: 1},
+		},
+		StartTime: time.Now(),
+	}
+
+	usage := &dto.Usage{
+		PromptTokens:     2604,
+		CompletionTokens: 383,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedCreationTokens: 100,
+		},
+	}
+
+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
+
+	// OpenRouter usage is already normalized. prompt_tokens should stay intact.
+	// quota = 2604 + 100*1.25 + 383 = 3112
+	require.Equal(t, 2604, summary.PromptTokens)
+	require.Equal(t, 3112, summary.Quota)
+}
+
+func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	w := httptest.NewRecorder()
+	ctx, _ := gin.CreateTestContext(w)
+
+	relayInfo := &relaycommon.RelayInfo{
+		FinalRequestRelayFormat: types.RelayFormatClaude,
+		OriginModelName:         "anthropic/claude-3.7-sonnet",
+		ChannelMeta: &relaycommon.ChannelMeta{
+			ChannelType: constant.ChannelTypeOpenRouter,
+		},
+		PriceData: types.PriceData{
+			ModelRatio:         1,
+			CompletionRatio:    1,
+			CacheRatio:         0.1,
+			CacheCreationRatio: 1.25,
+			GroupRatioInfo:     types.GroupRatioInfo{GroupRatio: 1},
+		},
+		StartTime: time.Now(),
+	}
+
+	usage := &dto.Usage{
+		PromptTokens:     2604,
+		CompletionTokens: 383,
+		PromptTokensDetails: dto.InputTokenDetails{
+			CachedTokens: 2432,
+		},
+	}
+
+	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
+
+	// Pre-PR PostClaudeConsumeQuota behavior for OpenRouter:
+	// prompt = 2604 - 2432 = 172
+	// quota = 172 + 2432*0.1 + 383 = 798.2 => 798
+	require.True(t, summary.IsClaudeUsageSemantic)
+	require.Equal(t, 172, summary.PromptTokens)
+	require.Equal(t, 798, summary.Quota)
+}

From d4a470a638e6ecbd7e5ab75351293f32622bc148 Mon Sep 17 00:00:00 2001
From: Seefs <i@seefs.me>
Date: Wed, 25 Mar 2026 13:24:52 +0800
Subject: [PATCH 2/3] fix: restore pre-3400 OpenRouter billing semantics

---
 service/text_quota.go      |  9 +++++----
 service/text_quota_test.go | 17 +++++++++--------
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/service/text_quota.go b/service/text_quota.go
index 6fe37997..8caee8f2 100644
--- a/service/text_quota.go
+++ b/service/text_quota.go
@@ -113,8 +113,9 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
 	summary.ImageTokens = usage.PromptTokensDetails.ImageTokens
 	summary.AudioTokens = usage.PromptTokensDetails.AudioTokens
 	legacyClaudeDerived := isLegacyClaudeDerivedOpenAIUsage(relayInfo, usage)
-	isOpenRouter := relayInfo.ChannelMeta != nil && relayInfo.ChannelType == constant.ChannelTypeOpenRouter
-	isOpenRouterClaudeBilling := isOpenRouter && summary.IsClaudeUsageSemantic
+	isOpenRouterClaudeBilling := relayInfo.ChannelMeta != nil &&
+		relayInfo.ChannelType == constant.ChannelTypeOpenRouter &&
+		summary.IsClaudeUsageSemantic
 
 	if isOpenRouterClaudeBilling {
 		summary.PromptTokens -= summary.CacheTokens
@@ -199,7 +200,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
 
 		var cachedTokensWithRatio decimal.Decimal
 		if !dCacheTokens.IsZero() {
-			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter {
+			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
 				baseTokens = baseTokens.Sub(dCacheTokens)
 			}
 			cachedTokensWithRatio = dCacheTokens.Mul(dCacheRatio)
@@ -208,7 +209,7 @@ func calculateTextQuotaSummary(ctx *gin.Context, relayInfo *relaycommon.RelayInf
 		var cachedCreationTokensWithRatio decimal.Decimal
 		hasSplitCacheCreationTokens := summary.CacheCreationTokens5m > 0 || summary.CacheCreationTokens1h > 0
 		if !dCachedCreationTokens.IsZero() || hasSplitCacheCreationTokens {
-			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived && !isOpenRouter {
+			if !summary.IsClaudeUsageSemantic && !legacyClaudeDerived {
 				baseTokens = baseTokens.Sub(dCachedCreationTokens)
 				cachedCreationTokensWithRatio = dCachedCreationTokens.Mul(dCacheCreationRatio)
 			} else {
diff --git a/service/text_quota_test.go b/service/text_quota_test.go
index 734eacf9..e995de17 100644
--- a/service/text_quota_test.go
+++ b/service/text_quota_test.go
@@ -206,7 +206,7 @@ func TestCalculateTextQuotaSummaryHandlesLegacyClaudeDerivedOpenAIUsage(t *testi
 	require.Equal(t, 1624, summary.Quota)
 }
 
-func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheTokensFromPrompt(t *testing.T) {
+func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheReadFromPromptBilling(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	w := httptest.NewRecorder()
 	ctx, _ := gin.CreateTestContext(w)
@@ -236,13 +236,14 @@ func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheTokensFromPrompt
 
 	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
 
-	// OpenRouter usage is already normalized. prompt_tokens should stay intact.
-	// quota = 2604 + 2432*0.1 + 383 = 3230.2 => 3230
+	// OpenRouter OpenAI-format display keeps prompt_tokens as total input,
+	// but billing still separates normal input from cache read tokens.
+	// quota = (2604 - 2432) + 2432*0.1 + 383 = 798.2 => 798
 	require.Equal(t, 2604, summary.PromptTokens)
-	require.Equal(t, 3230, summary.Quota)
+	require.Equal(t, 798, summary.Quota)
 }
 
-func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheCreationTokensFromPrompt(t *testing.T) {
+func TestCalculateTextQuotaSummarySeparatesOpenRouterCacheCreationFromPromptBilling(t *testing.T) {
 	gin.SetMode(gin.TestMode)
 	w := httptest.NewRecorder()
 	ctx, _ := gin.CreateTestContext(w)
@@ -271,10 +272,10 @@ func TestCalculateTextQuotaSummaryDoesNotSubtractOpenRouterCacheCreationTokensFr
 
 	summary := calculateTextQuotaSummary(ctx, relayInfo, usage)
 
-	// OpenRouter usage is already normalized. prompt_tokens should stay intact.
-	// quota = 2604 + 100*1.25 + 383 = 3112
+	// prompt_tokens is still logged as total input, but cache creation is billed separately.
+	// quota = (2604 - 100) + 100*1.25 + 383 = 3012
 	require.Equal(t, 2604, summary.PromptTokens)
-	require.Equal(t, 3112, summary.Quota)
+	require.Equal(t, 3012, summary.Quota)
 }
 
 func TestCalculateTextQuotaSummaryKeepsPrePRClaudeOpenRouterBilling(t *testing.T) {

From 926e1781dd87bd249218cd60c985c89ef4e48309 Mon Sep 17 00:00:00 2001
From: Seefs <i@seefs.me>
Date: Wed, 25 Mar 2026 13:49:21 +0800
Subject: [PATCH 3/3] fix: preserve cache usage in openai-to-claude response
 conversion

---
 service/convert.go | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/service/convert.go b/service/convert.go
index 7b5f3946..59d4f8fe 100644
--- a/service/convert.go
+++ b/service/convert.go
@@ -616,10 +616,7 @@ func ResponseOpenAI2Claude(openAIResponse *dto.OpenAITextResponse, info *relayco
 	}
 	claudeResponse.Content = contents
 	claudeResponse.StopReason = stopReason
-	claudeResponse.Usage = &dto.ClaudeUsage{
-		InputTokens:  openAIResponse.PromptTokens,
-		OutputTokens: openAIResponse.CompletionTokens,
-	}
+	claudeResponse.Usage = buildClaudeUsageFromOpenAIUsage(&openAIResponse.Usage)
 
 	return claudeResponse
 }