fix(openai): 修复 gpt-5.4 长上下文计费与快照白名单

补齐 gpt-5.4 fallback 的长上下文计费元信息，\n确保超过 272000 输入 token 时对整次会话应用\n2x 输入与 1.5x 输出计费规则。\n\n同时将官方快照 gpt-5.4-2026-03-05 加入前端\n白名单候选与回归测试，避免 whitelist 模式误拦截。\n\nCo-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> (cherry picked from commit d95497af87f608c6dadcbe7d6e851de9413ae147)
2026-03-06 09:04:58 +08:00
parent 1a0d4ed668
commit f366026435
6 changed files with 137 additions and 31 deletions
--- a/backend/internal/service/billing_service.go
+++ b/backend/internal/service/billing_service.go
@@ -43,15 +43,24 @@ type BillingCache interface {
 // ModelPricing 模型价格配置（per-token价格，与LiteLLM格式一致）
 type ModelPricing struct {
-	InputPricePerToken         float64 // 每token输入价格 (USD)
+	InputPricePerToken          float64 // 每token输入价格 (USD)
-	OutputPricePerToken        float64 // 每token输出价格 (USD)
+	OutputPricePerToken         float64 // 每token输出价格 (USD)
-	CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
+	CacheCreationPricePerToken  float64 // 缓存创建每token价格 (USD)
-	CacheReadPricePerToken     float64 // 缓存读取每token价格 (USD)
+	CacheReadPricePerToken      float64 // 缓存读取每token价格 (USD)
-	CacheCreation5mPrice       float64 // 5分钟缓存创建每token价格 (USD)
+	CacheCreation5mPrice        float64 // 5分钟缓存创建每token价格 (USD)
-	CacheCreation1hPrice       float64 // 1小时缓存创建每token价格 (USD)
+	CacheCreation1hPrice        float64 // 1小时缓存创建每token价格 (USD)
-	SupportsCacheBreakdown     bool    // 是否支持详细的缓存分类
+	SupportsCacheBreakdown      bool    // 是否支持详细的缓存分类
 	LongContextInputThreshold   int     // 超过阈值后按整次会话提升输入价格
 	LongContextInputMultiplier  float64 // 长上下文整次会话输入倍率
 	LongContextOutputMultiplier float64 // 长上下文整次会话输出倍率
 }
 const (
 	openAIGPT54LongContextInputThreshold   = 272000
 	openAIGPT54LongContextInputMultiplier  = 2.0
 	openAIGPT54LongContextOutputMultiplier = 1.5
 )
 // UsageTokens 使用的token数量
 type UsageTokens struct {
 	InputTokens           int
@@ -172,11 +181,14 @@ func (s *BillingService) initFallbackPricing() {
 	}
 	// OpenAI GPT-5.4（业务指定价格）
 	s.fallbackPrices["gpt-5.4"] = &ModelPricing{
-		InputPricePerToken:         2.5e-6,  // $2.5 per MTok
+		InputPricePerToken:          2.5e-6,  // $2.5 per MTok
-		OutputPricePerToken:        15e-6,   // $15 per MTok
+		OutputPricePerToken:         15e-6,   // $15 per MTok
-		CacheCreationPricePerToken: 2.5e-6,  // $2.5 per MTok
+		CacheCreationPricePerToken:  2.5e-6,  // $2.5 per MTok
-		CacheReadPricePerToken:     0.25e-6, // $0.25 per MTok
+		CacheReadPricePerToken:      0.25e-6, // $0.25 per MTok
-		SupportsCacheBreakdown:     false,
+		SupportsCacheBreakdown:      false,
 		LongContextInputThreshold:   openAIGPT54LongContextInputThreshold,
 		LongContextInputMultiplier:  openAIGPT54LongContextInputMultiplier,
 		LongContextOutputMultiplier: openAIGPT54LongContextOutputMultiplier,
 	}
 	// Codex 族兜底统一按 GPT-5.1 Codex 价格计费
 	s.fallbackPrices["gpt-5.1-codex"] = &ModelPricing{
@@ -256,15 +268,18 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
 			price5m := litellmPricing.CacheCreationInputTokenCost
 			price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr
 			enableBreakdown := price1h > 0 && price1h > price5m
-			return &ModelPricing{
+			return s.applyModelSpecificPricingPolicy(model, &ModelPricing{
-				InputPricePerToken:         litellmPricing.InputCostPerToken,
+				InputPricePerToken:          litellmPricing.InputCostPerToken,
-				OutputPricePerToken:        litellmPricing.OutputCostPerToken,
+				OutputPricePerToken:         litellmPricing.OutputCostPerToken,
-				CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
+				CacheCreationPricePerToken:  litellmPricing.CacheCreationInputTokenCost,
-				CacheReadPricePerToken:     litellmPricing.CacheReadInputTokenCost,
+				CacheReadPricePerToken:      litellmPricing.CacheReadInputTokenCost,
-				CacheCreation5mPrice:       price5m,
+				CacheCreation5mPrice:        price5m,
-				CacheCreation1hPrice:       price1h,
+				CacheCreation1hPrice:        price1h,
-				SupportsCacheBreakdown:     enableBreakdown,
+				SupportsCacheBreakdown:      enableBreakdown,
-			}, nil
+				LongContextInputThreshold:   litellmPricing.LongContextInputTokenThreshold,
 				LongContextInputMultiplier:  litellmPricing.LongContextInputCostMultiplier,
 				LongContextOutputMultiplier: litellmPricing.LongContextOutputCostMultiplier,
 			}), nil
 		}
 	}
@@ -272,7 +287,7 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
 	fallback := s.getFallbackPricing(model)
 	if fallback != nil {
 		log.Printf("[Billing] Using fallback pricing for model: %s", model)
-		return fallback, nil
+		return s.applyModelSpecificPricingPolicy(model, fallback), nil
 	}
 	return nil, fmt.Errorf("pricing not found for model: %s", model)
@@ -286,12 +301,18 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
 	}
 	breakdown := &CostBreakdown{}
 	inputPricePerToken := pricing.InputPricePerToken
 	outputPricePerToken := pricing.OutputPricePerToken
 	if s.shouldApplySessionLongContextPricing(tokens, pricing) {
 		inputPricePerToken *= pricing.LongContextInputMultiplier
 		outputPricePerToken *= pricing.LongContextOutputMultiplier
 	}
 	// 计算输入token费用（使用per-token价格）
-	breakdown.InputCost = float64(tokens.InputTokens) * pricing.InputPricePerToken
+	breakdown.InputCost = float64(tokens.InputTokens) * inputPricePerToken
 	// 计算输出token费用
-	breakdown.OutputCost = float64(tokens.OutputTokens) * pricing.OutputPricePerToken
+	breakdown.OutputCost = float64(tokens.OutputTokens) * outputPricePerToken
 	// 计算缓存费用
 	if pricing.SupportsCacheBreakdown && (pricing.CacheCreation5mPrice > 0 || pricing.CacheCreation1hPrice > 0) {
@@ -323,6 +344,45 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
 	return breakdown, nil
 }
 func (s *BillingService) applyModelSpecificPricingPolicy(model string, pricing *ModelPricing) *ModelPricing {
 	if pricing == nil {
 		return nil
 	}
 	if !isOpenAIGPT54Model(model) {
 		return pricing
 	}
 	if pricing.LongContextInputThreshold > 0 && pricing.LongContextInputMultiplier > 0 && pricing.LongContextOutputMultiplier > 0 {
 		return pricing
 	}
 	cloned := *pricing
 	if cloned.LongContextInputThreshold <= 0 {
 		cloned.LongContextInputThreshold = openAIGPT54LongContextInputThreshold
 	}
 	if cloned.LongContextInputMultiplier <= 0 {
 		cloned.LongContextInputMultiplier = openAIGPT54LongContextInputMultiplier
 	}
 	if cloned.LongContextOutputMultiplier <= 0 {
 		cloned.LongContextOutputMultiplier = openAIGPT54LongContextOutputMultiplier
 	}
 	return &cloned
 }
 func (s *BillingService) shouldApplySessionLongContextPricing(tokens UsageTokens, pricing *ModelPricing) bool {
 	if pricing == nil || pricing.LongContextInputThreshold <= 0 {
 		return false
 	}
 	if pricing.LongContextInputMultiplier <= 1 && pricing.LongContextOutputMultiplier <= 1 {
 		return false
 	}
 	totalInputTokens := tokens.InputTokens + tokens.CacheReadTokens
 	return totalInputTokens > pricing.LongContextInputThreshold
 }
 func isOpenAIGPT54Model(model string) bool {
 	normalized := normalizeCodexModel(strings.TrimSpace(strings.ToLower(model)))
 	return normalized == "gpt-5.4"
 }
 // CalculateCostWithConfig 使用配置中的默认倍率计算费用
 func (s *BillingService) CalculateCostWithConfig(model string, tokens UsageTokens) (*CostBreakdown, error) {
 	multiplier := s.cfg.Default.RateMultiplier
--- a/backend/internal/service/billing_service_test.go
+++ b/backend/internal/service/billing_service_test.go
@@ -169,6 +169,28 @@ func TestGetModelPricing_OpenAIGPT54Fallback(t *testing.T) {
 	require.InDelta(t, 2.5e-6, pricing.InputPricePerToken, 1e-12)
 	require.InDelta(t, 15e-6, pricing.OutputPricePerToken, 1e-12)
 	require.InDelta(t, 0.25e-6, pricing.CacheReadPricePerToken, 1e-12)
 	require.Equal(t, 272000, pricing.LongContextInputThreshold)
 	require.InDelta(t, 2.0, pricing.LongContextInputMultiplier, 1e-12)
 	require.InDelta(t, 1.5, pricing.LongContextOutputMultiplier, 1e-12)
 }
 func TestCalculateCost_OpenAIGPT54LongContextAppliesWholeSessionMultipliers(t *testing.T) {
 	svc := newTestBillingService()
 	tokens := UsageTokens{
 		InputTokens:  300000,
 		OutputTokens: 4000,
 	}
 	cost, err := svc.CalculateCost("gpt-5.4-2026-03-05", tokens, 1.0)
 	require.NoError(t, err)
 	expectedInput := float64(tokens.InputTokens) * 2.5e-6 * 2.0
 	expectedOutput := float64(tokens.OutputTokens) * 15e-6 * 1.5
 	require.InDelta(t, expectedInput, cost.InputCost, 1e-10)
 	require.InDelta(t, expectedOutput, cost.OutputCost, 1e-10)
 	require.InDelta(t, expectedInput+expectedOutput, cost.TotalCost, 1e-10)
 	require.InDelta(t, expectedInput+expectedOutput, cost.ActualCost, 1e-10)
 }
 func TestGetFallbackPricing_FamilyMatching(t *testing.T) {
--- a/backend/internal/service/pricing_service.go
+++ b/backend/internal/service/pricing_service.go
@@ -24,12 +24,15 @@ var (
 	openAIModelDatePattern     = regexp.MustCompile(`-\d{8}$`)
 	openAIModelBasePattern     = regexp.MustCompile(`^(gpt-\d+(?:\.\d+)?)(?:-|$)`)
 	openAIGPT54FallbackPricing = &LiteLLMModelPricing{
-		InputCostPerToken:       2.5e-06, // $2.5 per MTok
+		InputCostPerToken:               2.5e-06, // $2.5 per MTok
-		OutputCostPerToken:      1.5e-05, // $15 per MTok
+		OutputCostPerToken:              1.5e-05, // $15 per MTok
-		CacheReadInputTokenCost: 2.5e-07, // $0.25 per MTok
+		CacheReadInputTokenCost:         2.5e-07, // $0.25 per MTok
-		LiteLLMProvider:         "openai",
+		LongContextInputTokenThreshold:  272000,
-		Mode:                    "chat",
+		LongContextInputCostMultiplier:  2.0,
-		SupportsPromptCaching:   true,
+		LongContextOutputCostMultiplier: 1.5,
 		LiteLLMProvider:                 "openai",
 		Mode:                            "chat",
 		SupportsPromptCaching:           true,
 	}
 )
@@ -41,6 +44,9 @@ type LiteLLMModelPricing struct {
 	CacheCreationInputTokenCost         float64 `json:"cache_creation_input_token_cost"`
 	CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"`
 	CacheReadInputTokenCost             float64 `json:"cache_read_input_token_cost"`
 	LongContextInputTokenThreshold      int     `json:"long_context_input_token_threshold,omitempty"`
 	LongContextInputCostMultiplier      float64 `json:"long_context_input_cost_multiplier,omitempty"`
 	LongContextOutputCostMultiplier     float64 `json:"long_context_output_cost_multiplier,omitempty"`
 	LiteLLMProvider                     string  `json:"litellm_provider"`
 	Mode                                string  `json:"mode"`
 	SupportsPromptCaching               bool    `json:"supports_prompt_caching"`
--- a/backend/internal/service/pricing_service_test.go
+++ b/backend/internal/service/pricing_service_test.go
@@ -64,4 +64,7 @@ func TestGetModelPricing_Gpt54UsesStaticFallbackWhenRemoteMissing(t *testing.T)
 	require.InDelta(t, 2.5e-6, got.InputCostPerToken, 1e-12)
 	require.InDelta(t, 1.5e-5, got.OutputCostPerToken, 1e-12)
 	require.InDelta(t, 2.5e-7, got.CacheReadInputTokenCost, 1e-12)
 	require.Equal(t, 272000, got.LongContextInputTokenThreshold)
 	require.InDelta(t, 2.0, got.LongContextInputCostMultiplier, 1e-12)
 	require.InDelta(t, 1.5, got.LongContextOutputCostMultiplier, 1e-12)
 }
--- a/frontend/src/composables/tests/useModelWhitelist.spec.ts
+++ b/frontend/src/composables/tests/useModelWhitelist.spec.ts
@@ -2,6 +2,13 @@ import { describe, expect, it } from 'vitest'
 import { buildModelMappingObject, getModelsByPlatform } from '../useModelWhitelist'
 describe('useModelWhitelist', () => {
  it('openai 模型列表包含 GPT-5.4 官方快照', () => {
    const models = getModelsByPlatform('openai')
    expect(models).toContain('gpt-5.4')
    expect(models).toContain('gpt-5.4-2026-03-05')
  })
  it('antigravity 模型列表包含图片模型兼容项', () => {
    const models = getModelsByPlatform('antigravity')
@@ -15,4 +22,12 @@ describe('useModelWhitelist', () => {
      'gemini-3.1-flash-image': 'gemini-3.1-flash-image'
    })
  })
  it('whitelist 模式会保留 GPT-5.4 官方快照的精确映射', () => {
    const mapping = buildModelMappingObject('whitelist', ['gpt-5.4-2026-03-05'], [])
    expect(mapping).toEqual({
      'gpt-5.4-2026-03-05': 'gpt-5.4-2026-03-05'
    })
  })
 })
--- a/frontend/src/composables/useModelWhitelist.ts
+++ b/frontend/src/composables/useModelWhitelist.ts
@@ -25,7 +25,7 @@ const openaiModels = [
  'gpt-5.2', 'gpt-5.2-2025-12-11', 'gpt-5.2-chat-latest',
  'gpt-5.2-codex', 'gpt-5.2-pro', 'gpt-5.2-pro-2025-12-11',
  // GPT-5.4 系列
-  'gpt-5.4',
+  'gpt-5.4', 'gpt-5.4-2026-03-05',
  // GPT-5.3 系列
  'gpt-5.3-codex', 'gpt-5.3-codex-spark',
  'chatgpt-4o-latest',