fix(openai): 修复 gpt-5.4 长上下文计费与快照白名单
补齐 gpt-5.4 fallback 的长上下文计费元信息,\n确保超过 272000 输入 token 时对整次会话应用\n2x 输入与 1.5x 输出计费规则。\n\n同时将官方快照 gpt-5.4-2026-03-05 加入前端\n白名单候选与回归测试,避免 whitelist 模式误拦截。\n\nCo-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> (cherry picked from commit d95497af87f608c6dadcbe7d6e851de9413ae147)
This commit is contained in:
@@ -43,15 +43,24 @@ type BillingCache interface {
|
|||||||
|
|
||||||
// ModelPricing 模型价格配置(per-token价格,与LiteLLM格式一致)
|
// ModelPricing 模型价格配置(per-token价格,与LiteLLM格式一致)
|
||||||
type ModelPricing struct {
|
type ModelPricing struct {
|
||||||
InputPricePerToken float64 // 每token输入价格 (USD)
|
InputPricePerToken float64 // 每token输入价格 (USD)
|
||||||
OutputPricePerToken float64 // 每token输出价格 (USD)
|
OutputPricePerToken float64 // 每token输出价格 (USD)
|
||||||
CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
|
CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
|
||||||
CacheReadPricePerToken float64 // 缓存读取每token价格 (USD)
|
CacheReadPricePerToken float64 // 缓存读取每token价格 (USD)
|
||||||
CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD)
|
CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD)
|
||||||
CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD)
|
CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD)
|
||||||
SupportsCacheBreakdown bool // 是否支持详细的缓存分类
|
SupportsCacheBreakdown bool // 是否支持详细的缓存分类
|
||||||
|
LongContextInputThreshold int // 超过阈值后按整次会话提升输入价格
|
||||||
|
LongContextInputMultiplier float64 // 长上下文整次会话输入倍率
|
||||||
|
LongContextOutputMultiplier float64 // 长上下文整次会话输出倍率
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
openAIGPT54LongContextInputThreshold = 272000
|
||||||
|
openAIGPT54LongContextInputMultiplier = 2.0
|
||||||
|
openAIGPT54LongContextOutputMultiplier = 1.5
|
||||||
|
)
|
||||||
|
|
||||||
// UsageTokens 使用的token数量
|
// UsageTokens 使用的token数量
|
||||||
type UsageTokens struct {
|
type UsageTokens struct {
|
||||||
InputTokens int
|
InputTokens int
|
||||||
@@ -172,11 +181,14 @@ func (s *BillingService) initFallbackPricing() {
|
|||||||
}
|
}
|
||||||
// OpenAI GPT-5.4(业务指定价格)
|
// OpenAI GPT-5.4(业务指定价格)
|
||||||
s.fallbackPrices["gpt-5.4"] = &ModelPricing{
|
s.fallbackPrices["gpt-5.4"] = &ModelPricing{
|
||||||
InputPricePerToken: 2.5e-6, // $2.5 per MTok
|
InputPricePerToken: 2.5e-6, // $2.5 per MTok
|
||||||
OutputPricePerToken: 15e-6, // $15 per MTok
|
OutputPricePerToken: 15e-6, // $15 per MTok
|
||||||
CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok
|
CacheCreationPricePerToken: 2.5e-6, // $2.5 per MTok
|
||||||
CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok
|
CacheReadPricePerToken: 0.25e-6, // $0.25 per MTok
|
||||||
SupportsCacheBreakdown: false,
|
SupportsCacheBreakdown: false,
|
||||||
|
LongContextInputThreshold: openAIGPT54LongContextInputThreshold,
|
||||||
|
LongContextInputMultiplier: openAIGPT54LongContextInputMultiplier,
|
||||||
|
LongContextOutputMultiplier: openAIGPT54LongContextOutputMultiplier,
|
||||||
}
|
}
|
||||||
// Codex 族兜底统一按 GPT-5.1 Codex 价格计费
|
// Codex 族兜底统一按 GPT-5.1 Codex 价格计费
|
||||||
s.fallbackPrices["gpt-5.1-codex"] = &ModelPricing{
|
s.fallbackPrices["gpt-5.1-codex"] = &ModelPricing{
|
||||||
@@ -256,15 +268,18 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
|
|||||||
price5m := litellmPricing.CacheCreationInputTokenCost
|
price5m := litellmPricing.CacheCreationInputTokenCost
|
||||||
price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr
|
price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr
|
||||||
enableBreakdown := price1h > 0 && price1h > price5m
|
enableBreakdown := price1h > 0 && price1h > price5m
|
||||||
return &ModelPricing{
|
return s.applyModelSpecificPricingPolicy(model, &ModelPricing{
|
||||||
InputPricePerToken: litellmPricing.InputCostPerToken,
|
InputPricePerToken: litellmPricing.InputCostPerToken,
|
||||||
OutputPricePerToken: litellmPricing.OutputCostPerToken,
|
OutputPricePerToken: litellmPricing.OutputCostPerToken,
|
||||||
CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
|
CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
|
||||||
CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost,
|
CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost,
|
||||||
CacheCreation5mPrice: price5m,
|
CacheCreation5mPrice: price5m,
|
||||||
CacheCreation1hPrice: price1h,
|
CacheCreation1hPrice: price1h,
|
||||||
SupportsCacheBreakdown: enableBreakdown,
|
SupportsCacheBreakdown: enableBreakdown,
|
||||||
}, nil
|
LongContextInputThreshold: litellmPricing.LongContextInputTokenThreshold,
|
||||||
|
LongContextInputMultiplier: litellmPricing.LongContextInputCostMultiplier,
|
||||||
|
LongContextOutputMultiplier: litellmPricing.LongContextOutputCostMultiplier,
|
||||||
|
}), nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -272,7 +287,7 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
|
|||||||
fallback := s.getFallbackPricing(model)
|
fallback := s.getFallbackPricing(model)
|
||||||
if fallback != nil {
|
if fallback != nil {
|
||||||
log.Printf("[Billing] Using fallback pricing for model: %s", model)
|
log.Printf("[Billing] Using fallback pricing for model: %s", model)
|
||||||
return fallback, nil
|
return s.applyModelSpecificPricingPolicy(model, fallback), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("pricing not found for model: %s", model)
|
return nil, fmt.Errorf("pricing not found for model: %s", model)
|
||||||
@@ -286,12 +301,18 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
|
|||||||
}
|
}
|
||||||
|
|
||||||
breakdown := &CostBreakdown{}
|
breakdown := &CostBreakdown{}
|
||||||
|
inputPricePerToken := pricing.InputPricePerToken
|
||||||
|
outputPricePerToken := pricing.OutputPricePerToken
|
||||||
|
if s.shouldApplySessionLongContextPricing(tokens, pricing) {
|
||||||
|
inputPricePerToken *= pricing.LongContextInputMultiplier
|
||||||
|
outputPricePerToken *= pricing.LongContextOutputMultiplier
|
||||||
|
}
|
||||||
|
|
||||||
// 计算输入token费用(使用per-token价格)
|
// 计算输入token费用(使用per-token价格)
|
||||||
breakdown.InputCost = float64(tokens.InputTokens) * pricing.InputPricePerToken
|
breakdown.InputCost = float64(tokens.InputTokens) * inputPricePerToken
|
||||||
|
|
||||||
// 计算输出token费用
|
// 计算输出token费用
|
||||||
breakdown.OutputCost = float64(tokens.OutputTokens) * pricing.OutputPricePerToken
|
breakdown.OutputCost = float64(tokens.OutputTokens) * outputPricePerToken
|
||||||
|
|
||||||
// 计算缓存费用
|
// 计算缓存费用
|
||||||
if pricing.SupportsCacheBreakdown && (pricing.CacheCreation5mPrice > 0 || pricing.CacheCreation1hPrice > 0) {
|
if pricing.SupportsCacheBreakdown && (pricing.CacheCreation5mPrice > 0 || pricing.CacheCreation1hPrice > 0) {
|
||||||
@@ -323,6 +344,45 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
|
|||||||
return breakdown, nil
|
return breakdown, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s *BillingService) applyModelSpecificPricingPolicy(model string, pricing *ModelPricing) *ModelPricing {
|
||||||
|
if pricing == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if !isOpenAIGPT54Model(model) {
|
||||||
|
return pricing
|
||||||
|
}
|
||||||
|
if pricing.LongContextInputThreshold > 0 && pricing.LongContextInputMultiplier > 0 && pricing.LongContextOutputMultiplier > 0 {
|
||||||
|
return pricing
|
||||||
|
}
|
||||||
|
cloned := *pricing
|
||||||
|
if cloned.LongContextInputThreshold <= 0 {
|
||||||
|
cloned.LongContextInputThreshold = openAIGPT54LongContextInputThreshold
|
||||||
|
}
|
||||||
|
if cloned.LongContextInputMultiplier <= 0 {
|
||||||
|
cloned.LongContextInputMultiplier = openAIGPT54LongContextInputMultiplier
|
||||||
|
}
|
||||||
|
if cloned.LongContextOutputMultiplier <= 0 {
|
||||||
|
cloned.LongContextOutputMultiplier = openAIGPT54LongContextOutputMultiplier
|
||||||
|
}
|
||||||
|
return &cloned
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *BillingService) shouldApplySessionLongContextPricing(tokens UsageTokens, pricing *ModelPricing) bool {
|
||||||
|
if pricing == nil || pricing.LongContextInputThreshold <= 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if pricing.LongContextInputMultiplier <= 1 && pricing.LongContextOutputMultiplier <= 1 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
totalInputTokens := tokens.InputTokens + tokens.CacheReadTokens
|
||||||
|
return totalInputTokens > pricing.LongContextInputThreshold
|
||||||
|
}
|
||||||
|
|
||||||
|
func isOpenAIGPT54Model(model string) bool {
|
||||||
|
normalized := normalizeCodexModel(strings.TrimSpace(strings.ToLower(model)))
|
||||||
|
return normalized == "gpt-5.4"
|
||||||
|
}
|
||||||
|
|
||||||
// CalculateCostWithConfig 使用配置中的默认倍率计算费用
|
// CalculateCostWithConfig 使用配置中的默认倍率计算费用
|
||||||
func (s *BillingService) CalculateCostWithConfig(model string, tokens UsageTokens) (*CostBreakdown, error) {
|
func (s *BillingService) CalculateCostWithConfig(model string, tokens UsageTokens) (*CostBreakdown, error) {
|
||||||
multiplier := s.cfg.Default.RateMultiplier
|
multiplier := s.cfg.Default.RateMultiplier
|
||||||
|
|||||||
@@ -169,6 +169,28 @@ func TestGetModelPricing_OpenAIGPT54Fallback(t *testing.T) {
|
|||||||
require.InDelta(t, 2.5e-6, pricing.InputPricePerToken, 1e-12)
|
require.InDelta(t, 2.5e-6, pricing.InputPricePerToken, 1e-12)
|
||||||
require.InDelta(t, 15e-6, pricing.OutputPricePerToken, 1e-12)
|
require.InDelta(t, 15e-6, pricing.OutputPricePerToken, 1e-12)
|
||||||
require.InDelta(t, 0.25e-6, pricing.CacheReadPricePerToken, 1e-12)
|
require.InDelta(t, 0.25e-6, pricing.CacheReadPricePerToken, 1e-12)
|
||||||
|
require.Equal(t, 272000, pricing.LongContextInputThreshold)
|
||||||
|
require.InDelta(t, 2.0, pricing.LongContextInputMultiplier, 1e-12)
|
||||||
|
require.InDelta(t, 1.5, pricing.LongContextOutputMultiplier, 1e-12)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestCalculateCost_OpenAIGPT54LongContextAppliesWholeSessionMultipliers(t *testing.T) {
|
||||||
|
svc := newTestBillingService()
|
||||||
|
|
||||||
|
tokens := UsageTokens{
|
||||||
|
InputTokens: 300000,
|
||||||
|
OutputTokens: 4000,
|
||||||
|
}
|
||||||
|
|
||||||
|
cost, err := svc.CalculateCost("gpt-5.4-2026-03-05", tokens, 1.0)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
expectedInput := float64(tokens.InputTokens) * 2.5e-6 * 2.0
|
||||||
|
expectedOutput := float64(tokens.OutputTokens) * 15e-6 * 1.5
|
||||||
|
require.InDelta(t, expectedInput, cost.InputCost, 1e-10)
|
||||||
|
require.InDelta(t, expectedOutput, cost.OutputCost, 1e-10)
|
||||||
|
require.InDelta(t, expectedInput+expectedOutput, cost.TotalCost, 1e-10)
|
||||||
|
require.InDelta(t, expectedInput+expectedOutput, cost.ActualCost, 1e-10)
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetFallbackPricing_FamilyMatching(t *testing.T) {
|
func TestGetFallbackPricing_FamilyMatching(t *testing.T) {
|
||||||
|
|||||||
@@ -24,12 +24,15 @@ var (
|
|||||||
openAIModelDatePattern = regexp.MustCompile(`-\d{8}$`)
|
openAIModelDatePattern = regexp.MustCompile(`-\d{8}$`)
|
||||||
openAIModelBasePattern = regexp.MustCompile(`^(gpt-\d+(?:\.\d+)?)(?:-|$)`)
|
openAIModelBasePattern = regexp.MustCompile(`^(gpt-\d+(?:\.\d+)?)(?:-|$)`)
|
||||||
openAIGPT54FallbackPricing = &LiteLLMModelPricing{
|
openAIGPT54FallbackPricing = &LiteLLMModelPricing{
|
||||||
InputCostPerToken: 2.5e-06, // $2.5 per MTok
|
InputCostPerToken: 2.5e-06, // $2.5 per MTok
|
||||||
OutputCostPerToken: 1.5e-05, // $15 per MTok
|
OutputCostPerToken: 1.5e-05, // $15 per MTok
|
||||||
CacheReadInputTokenCost: 2.5e-07, // $0.25 per MTok
|
CacheReadInputTokenCost: 2.5e-07, // $0.25 per MTok
|
||||||
LiteLLMProvider: "openai",
|
LongContextInputTokenThreshold: 272000,
|
||||||
Mode: "chat",
|
LongContextInputCostMultiplier: 2.0,
|
||||||
SupportsPromptCaching: true,
|
LongContextOutputCostMultiplier: 1.5,
|
||||||
|
LiteLLMProvider: "openai",
|
||||||
|
Mode: "chat",
|
||||||
|
SupportsPromptCaching: true,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -41,6 +44,9 @@ type LiteLLMModelPricing struct {
|
|||||||
CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"`
|
CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"`
|
||||||
CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"`
|
CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"`
|
||||||
CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"`
|
CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"`
|
||||||
|
LongContextInputTokenThreshold int `json:"long_context_input_token_threshold,omitempty"`
|
||||||
|
LongContextInputCostMultiplier float64 `json:"long_context_input_cost_multiplier,omitempty"`
|
||||||
|
LongContextOutputCostMultiplier float64 `json:"long_context_output_cost_multiplier,omitempty"`
|
||||||
LiteLLMProvider string `json:"litellm_provider"`
|
LiteLLMProvider string `json:"litellm_provider"`
|
||||||
Mode string `json:"mode"`
|
Mode string `json:"mode"`
|
||||||
SupportsPromptCaching bool `json:"supports_prompt_caching"`
|
SupportsPromptCaching bool `json:"supports_prompt_caching"`
|
||||||
|
|||||||
@@ -64,4 +64,7 @@ func TestGetModelPricing_Gpt54UsesStaticFallbackWhenRemoteMissing(t *testing.T)
|
|||||||
require.InDelta(t, 2.5e-6, got.InputCostPerToken, 1e-12)
|
require.InDelta(t, 2.5e-6, got.InputCostPerToken, 1e-12)
|
||||||
require.InDelta(t, 1.5e-5, got.OutputCostPerToken, 1e-12)
|
require.InDelta(t, 1.5e-5, got.OutputCostPerToken, 1e-12)
|
||||||
require.InDelta(t, 2.5e-7, got.CacheReadInputTokenCost, 1e-12)
|
require.InDelta(t, 2.5e-7, got.CacheReadInputTokenCost, 1e-12)
|
||||||
|
require.Equal(t, 272000, got.LongContextInputTokenThreshold)
|
||||||
|
require.InDelta(t, 2.0, got.LongContextInputCostMultiplier, 1e-12)
|
||||||
|
require.InDelta(t, 1.5, got.LongContextOutputCostMultiplier, 1e-12)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,13 @@ import { describe, expect, it } from 'vitest'
|
|||||||
import { buildModelMappingObject, getModelsByPlatform } from '../useModelWhitelist'
|
import { buildModelMappingObject, getModelsByPlatform } from '../useModelWhitelist'
|
||||||
|
|
||||||
describe('useModelWhitelist', () => {
|
describe('useModelWhitelist', () => {
|
||||||
|
it('openai 模型列表包含 GPT-5.4 官方快照', () => {
|
||||||
|
const models = getModelsByPlatform('openai')
|
||||||
|
|
||||||
|
expect(models).toContain('gpt-5.4')
|
||||||
|
expect(models).toContain('gpt-5.4-2026-03-05')
|
||||||
|
})
|
||||||
|
|
||||||
it('antigravity 模型列表包含图片模型兼容项', () => {
|
it('antigravity 模型列表包含图片模型兼容项', () => {
|
||||||
const models = getModelsByPlatform('antigravity')
|
const models = getModelsByPlatform('antigravity')
|
||||||
|
|
||||||
@@ -15,4 +22,12 @@ describe('useModelWhitelist', () => {
|
|||||||
'gemini-3.1-flash-image': 'gemini-3.1-flash-image'
|
'gemini-3.1-flash-image': 'gemini-3.1-flash-image'
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
|
it('whitelist 模式会保留 GPT-5.4 官方快照的精确映射', () => {
|
||||||
|
const mapping = buildModelMappingObject('whitelist', ['gpt-5.4-2026-03-05'], [])
|
||||||
|
|
||||||
|
expect(mapping).toEqual({
|
||||||
|
'gpt-5.4-2026-03-05': 'gpt-5.4-2026-03-05'
|
||||||
|
})
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ const openaiModels = [
|
|||||||
'gpt-5.2', 'gpt-5.2-2025-12-11', 'gpt-5.2-chat-latest',
|
'gpt-5.2', 'gpt-5.2-2025-12-11', 'gpt-5.2-chat-latest',
|
||||||
'gpt-5.2-codex', 'gpt-5.2-pro', 'gpt-5.2-pro-2025-12-11',
|
'gpt-5.2-codex', 'gpt-5.2-pro', 'gpt-5.2-pro-2025-12-11',
|
||||||
// GPT-5.4 系列
|
// GPT-5.4 系列
|
||||||
'gpt-5.4',
|
'gpt-5.4', 'gpt-5.4-2026-03-05',
|
||||||
// GPT-5.3 系列
|
// GPT-5.3 系列
|
||||||
'gpt-5.3-codex', 'gpt-5.3-codex-spark',
|
'gpt-5.3-codex', 'gpt-5.3-codex-spark',
|
||||||
'chatgpt-4o-latest',
|
'chatgpt-4o-latest',
|
||||||
|
|||||||
Reference in New Issue
Block a user