diff --git a/backend/internal/service/billing_service.go b/backend/internal/service/billing_service.go index fbc06fd4..d058c25a 100644 --- a/backend/internal/service/billing_service.go +++ b/backend/internal/service/billing_service.go @@ -43,15 +43,24 @@ type BillingCache interface { // ModelPricing 模型价格配置(per-token价格,与LiteLLM格式一致) type ModelPricing struct { - InputPricePerToken float64 // 每token输入价格 (USD) - OutputPricePerToken float64 // 每token输出价格 (USD) - CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD) - CacheReadPricePerToken float64 // 缓存读取每token价格 (USD) - CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD) - CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD) - SupportsCacheBreakdown bool // 是否支持详细的缓存分类 + InputPricePerToken float64 // 每token输入价格 (USD) + OutputPricePerToken float64 // 每token输出价格 (USD) + CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD) + CacheReadPricePerToken float64 // 缓存读取每token价格 (USD) + CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD) + CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD) + SupportsCacheBreakdown bool // 是否支持详细的缓存分类 + LongContextInputThreshold int // 超过阈值后按整次会话提升输入价格 + LongContextInputMultiplier float64 // 长上下文整次会话输入倍率 + LongContextOutputMultiplier float64 // 长上下文整次会话输出倍率 } +const ( + openAIGPT54LongContextInputThreshold = 272000 + openAIGPT54LongContextInputMultiplier = 2.0 + openAIGPT54LongContextOutputMultiplier = 1.5 +) + // UsageTokens 使用的token数量 type UsageTokens struct { InputTokens int @@ -172,11 +181,14 @@ func (s *BillingService) initFallbackPricing() { } // OpenAI GPT-5.4(业务指定价格) s.fallbackPrices["gpt-5.4"] = &ModelPricing{ - InputPricePerToken: 2.5e-6, // $2.5 per MTok - OutputPricePerToken: 15e-6, // $15 per MTok - CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok - CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok - SupportsCacheBreakdown: false, + InputPricePerToken: 2.5e-6, // $2.5 per MTok + OutputPricePerToken: 15e-6, // $15 per MTok + CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok + CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok + SupportsCacheBreakdown: false, + LongContextInputThreshold: openAIGPT54LongContextInputThreshold, + LongContextInputMultiplier: openAIGPT54LongContextInputMultiplier, + LongContextOutputMultiplier: openAIGPT54LongContextOutputMultiplier, } // Codex 族兜底统一按 GPT-5.1 Codex 价格计费 s.fallbackPrices["gpt-5.1-codex"] = &ModelPricing{ @@ -256,15 +268,18 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) { price5m := litellmPricing.CacheCreationInputTokenCost price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr enableBreakdown := price1h > 0 && price1h > price5m - return &ModelPricing{ - InputPricePerToken: litellmPricing.InputCostPerToken, - OutputPricePerToken: litellmPricing.OutputCostPerToken, - CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost, - CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost, - CacheCreation5mPrice: price5m, - CacheCreation1hPrice: price1h, - SupportsCacheBreakdown: enableBreakdown, - }, nil + return s.applyModelSpecificPricingPolicy(model, &ModelPricing{ + InputPricePerToken: litellmPricing.InputCostPerToken, + OutputPricePerToken: litellmPricing.OutputCostPerToken, + CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost, + CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost, + CacheCreation5mPrice: price5m, + CacheCreation1hPrice: price1h, + SupportsCacheBreakdown: enableBreakdown, + LongContextInputThreshold: litellmPricing.LongContextInputTokenThreshold, + LongContextInputMultiplier: litellmPricing.LongContextInputCostMultiplier, + LongContextOutputMultiplier: litellmPricing.LongContextOutputCostMultiplier, + }), nil } } @@ -272,7 +287,7 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) { fallback := s.getFallbackPricing(model) if fallback != nil { log.Printf("[Billing] Using fallback pricing for model: %s", model) - return fallback, nil + return s.applyModelSpecificPricingPolicy(model, fallback), nil } return nil, fmt.Errorf("pricing not found for model: %s", model) @@ -286,12 +301,18 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul } breakdown := &CostBreakdown{} + inputPricePerToken := pricing.InputPricePerToken + outputPricePerToken := pricing.OutputPricePerToken + if s.shouldApplySessionLongContextPricing(tokens, pricing) { + inputPricePerToken *= pricing.LongContextInputMultiplier + outputPricePerToken *= pricing.LongContextOutputMultiplier + } // 计算输入token费用(使用per-token价格) - breakdown.InputCost = float64(tokens.InputTokens) * pricing.InputPricePerToken + breakdown.InputCost = float64(tokens.InputTokens) * inputPricePerToken // 计算输出token费用 - breakdown.OutputCost = float64(tokens.OutputTokens) * pricing.OutputPricePerToken + breakdown.OutputCost = float64(tokens.OutputTokens) * outputPricePerToken // 计算缓存费用 if pricing.SupportsCacheBreakdown && (pricing.CacheCreation5mPrice > 0 || pricing.CacheCreation1hPrice > 0) { @@ -323,6 +344,45 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul return breakdown, nil } +func (s *BillingService) applyModelSpecificPricingPolicy(model string, pricing *ModelPricing) *ModelPricing { + if pricing == nil { + return nil + } + if !isOpenAIGPT54Model(model) { + return pricing + } + if pricing.LongContextInputThreshold > 0 && pricing.LongContextInputMultiplier > 0 && pricing.LongContextOutputMultiplier > 0 { + return pricing + } + cloned := *pricing + if cloned.LongContextInputThreshold <= 0 { + cloned.LongContextInputThreshold = openAIGPT54LongContextInputThreshold + } + if cloned.LongContextInputMultiplier <= 0 { + cloned.LongContextInputMultiplier = openAIGPT54LongContextInputMultiplier + } + if cloned.LongContextOutputMultiplier <= 0 { + cloned.LongContextOutputMultiplier = openAIGPT54LongContextOutputMultiplier + } + return &cloned +} + +func (s *BillingService) shouldApplySessionLongContextPricing(tokens UsageTokens, pricing *ModelPricing) bool { + if pricing == nil || pricing.LongContextInputThreshold <= 0 { + return false + } + if pricing.LongContextInputMultiplier <= 1 && pricing.LongContextOutputMultiplier <= 1 { + return false + } + totalInputTokens := tokens.InputTokens + tokens.CacheReadTokens + return totalInputTokens > pricing.LongContextInputThreshold +} + +func isOpenAIGPT54Model(model string) bool { + normalized := normalizeCodexModel(strings.TrimSpace(strings.ToLower(model))) + return normalized == "gpt-5.4" +} + // CalculateCostWithConfig 使用配置中的默认倍率计算费用 func (s *BillingService) CalculateCostWithConfig(model string, tokens UsageTokens) (*CostBreakdown, error) { multiplier := s.cfg.Default.RateMultiplier diff --git a/backend/internal/service/billing_service_test.go b/backend/internal/service/billing_service_test.go index 3ae6dc85..0ba52e56 100644 --- a/backend/internal/service/billing_service_test.go +++ b/backend/internal/service/billing_service_test.go @@ -169,6 +169,28 @@ func TestGetModelPricing_OpenAIGPT54Fallback(t *testing.T) { require.InDelta(t, 2.5e-6, pricing.InputPricePerToken, 1e-12) require.InDelta(t, 15e-6, pricing.OutputPricePerToken, 1e-12) require.InDelta(t, 0.25e-6, pricing.CacheReadPricePerToken, 1e-12) + require.Equal(t, 272000, pricing.LongContextInputThreshold) + require.InDelta(t, 2.0, pricing.LongContextInputMultiplier, 1e-12) + require.InDelta(t, 1.5, pricing.LongContextOutputMultiplier, 1e-12) +} + +func TestCalculateCost_OpenAIGPT54LongContextAppliesWholeSessionMultipliers(t *testing.T) { + svc := newTestBillingService() + + tokens := UsageTokens{ + InputTokens: 300000, + OutputTokens: 4000, + } + + cost, err := svc.CalculateCost("gpt-5.4-2026-03-05", tokens, 1.0) + require.NoError(t, err) + + expectedInput := float64(tokens.InputTokens) * 2.5e-6 * 2.0 + expectedOutput := float64(tokens.OutputTokens) * 15e-6 * 1.5 + require.InDelta(t, expectedInput, cost.InputCost, 1e-10) + require.InDelta(t, expectedOutput, cost.OutputCost, 1e-10) + require.InDelta(t, expectedInput+expectedOutput, cost.TotalCost, 1e-10) + require.InDelta(t, expectedInput+expectedOutput, cost.ActualCost, 1e-10) } func TestGetFallbackPricing_FamilyMatching(t *testing.T) { diff --git a/backend/internal/service/pricing_service.go b/backend/internal/service/pricing_service.go index b81a160a..897623d6 100644 --- a/backend/internal/service/pricing_service.go +++ b/backend/internal/service/pricing_service.go @@ -24,12 +24,15 @@ var ( openAIModelDatePattern = regexp.MustCompile(`-\d{8}$`) openAIModelBasePattern = regexp.MustCompile(`^(gpt-\d+(?:\.\d+)?)(?:-|$)`) openAIGPT54FallbackPricing = &LiteLLMModelPricing{ - InputCostPerToken: 2.5e-06, // $2.5 per MTok - OutputCostPerToken: 1.5e-05, // $15 per MTok - CacheReadInputTokenCost: 2.5e-07, // $0.25 per MTok - LiteLLMProvider: "openai", - Mode: "chat", - SupportsPromptCaching: true, + InputCostPerToken: 2.5e-06, // $2.5 per MTok + OutputCostPerToken: 1.5e-05, // $15 per MTok + CacheReadInputTokenCost: 2.5e-07, // $0.25 per MTok + LongContextInputTokenThreshold: 272000, + LongContextInputCostMultiplier: 2.0, + LongContextOutputCostMultiplier: 1.5, + LiteLLMProvider: "openai", + Mode: "chat", + SupportsPromptCaching: true, } ) @@ -41,6 +44,9 @@ type LiteLLMModelPricing struct { CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"` CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"` CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"` + LongContextInputTokenThreshold int `json:"long_context_input_token_threshold,omitempty"` + LongContextInputCostMultiplier float64 `json:"long_context_input_cost_multiplier,omitempty"` + LongContextOutputCostMultiplier float64 `json:"long_context_output_cost_multiplier,omitempty"` LiteLLMProvider string `json:"litellm_provider"` Mode string `json:"mode"` SupportsPromptCaching bool `json:"supports_prompt_caching"` diff --git a/backend/internal/service/pricing_service_test.go b/backend/internal/service/pricing_service_test.go index de295283..6b67c55a 100644 --- a/backend/internal/service/pricing_service_test.go +++ b/backend/internal/service/pricing_service_test.go @@ -64,4 +64,7 @@ func TestGetModelPricing_Gpt54UsesStaticFallbackWhenRemoteMissing(t *testing.T) require.InDelta(t, 2.5e-6, got.InputCostPerToken, 1e-12) require.InDelta(t, 1.5e-5, got.OutputCostPerToken, 1e-12) require.InDelta(t, 2.5e-7, got.CacheReadInputTokenCost, 1e-12) + require.Equal(t, 272000, got.LongContextInputTokenThreshold) + require.InDelta(t, 2.0, got.LongContextInputCostMultiplier, 1e-12) + require.InDelta(t, 1.5, got.LongContextOutputCostMultiplier, 1e-12) } diff --git a/frontend/src/composables/__tests__/useModelWhitelist.spec.ts b/frontend/src/composables/__tests__/useModelWhitelist.spec.ts index 4088e5a4..79c88a29 100644 --- a/frontend/src/composables/__tests__/useModelWhitelist.spec.ts +++ b/frontend/src/composables/__tests__/useModelWhitelist.spec.ts @@ -2,6 +2,13 @@ import { describe, expect, it } from 'vitest' import { buildModelMappingObject, getModelsByPlatform } from '../useModelWhitelist' describe('useModelWhitelist', () => { + it('openai 模型列表包含 GPT-5.4 官方快照', () => { + const models = getModelsByPlatform('openai') + + expect(models).toContain('gpt-5.4') + expect(models).toContain('gpt-5.4-2026-03-05') + }) + it('antigravity 模型列表包含图片模型兼容项', () => { const models = getModelsByPlatform('antigravity') @@ -15,4 +22,12 @@ describe('useModelWhitelist', () => { 'gemini-3.1-flash-image': 'gemini-3.1-flash-image' }) }) + + it('whitelist 模式会保留 GPT-5.4 官方快照的精确映射', () => { + const mapping = buildModelMappingObject('whitelist', ['gpt-5.4-2026-03-05'], []) + + expect(mapping).toEqual({ + 'gpt-5.4-2026-03-05': 'gpt-5.4-2026-03-05' + }) + }) }) diff --git a/frontend/src/composables/useModelWhitelist.ts b/frontend/src/composables/useModelWhitelist.ts index 8e525873..986a99f4 100644 --- a/frontend/src/composables/useModelWhitelist.ts +++ b/frontend/src/composables/useModelWhitelist.ts @@ -25,7 +25,7 @@ const openaiModels = [ 'gpt-5.2', 'gpt-5.2-2025-12-11', 'gpt-5.2-chat-latest', 'gpt-5.2-codex', 'gpt-5.2-pro', 'gpt-5.2-pro-2025-12-11', // GPT-5.4 系列 - 'gpt-5.4', + 'gpt-5.4', 'gpt-5.4-2026-03-05', // GPT-5.3 系列 'gpt-5.3-codex', 'gpt-5.3-codex-spark', 'chatgpt-4o-latest',