feat: 区分 Anthropic 5m/1h 缓存创建 token 的差异化计费

Anthropic API 的 cache_creation 对象区分了 ephemeral_5m 和 ephemeral_1h
两种缓存创建 token,1h 单价远高于 5m(如 claude-3-5-haiku: 5m=$1/MTok,
1h=$6/MTok)。此前系统统一按 5m 单价计费,导致计费偏低。

后端:
- pricing_service: 加载 LiteLLM 的 cache_creation_input_token_cost_above_1hr
- billing_service: GetModelPricing 启用分类计费(安全守卫 1h>5m),
  CalculateCost 按 5m/1h 分别计费,无明细时回退到 5m 单价
- gateway_service: parseSSEUsage/handleNonStreamingResponse 用 gjson
  提取嵌套 cache_creation 对象的 ephemeral_5m/1h_input_tokens
- antigravity_gateway_service: extractSSEUsage/extractClaudeUsage 同步提取
- usage_log: 修复 GORM column tag 确保写入正确的数据库列
- 新增迁移 054: 删除 GORM 自动生成的重复列

前端:
- 使用记录 tooltip 展示 5m/1h 缓存创建明细(带彩色 badge 区分)
- 表格单元格缓存写入数值旁显示 1h 标识
This commit is contained in:
shaw
2026-02-14 18:15:35 +08:00
parent 2857fa2ef7
commit a817cafe3d
10 changed files with 174 additions and 42 deletions

View File

@@ -31,8 +31,8 @@ type ModelPricing struct {
OutputPricePerToken float64 // 每token输出价格 (USD)
CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD)
CacheReadPricePerToken float64 // 缓存读取每token价格 (USD)
CacheCreation5mPrice float64 // 5分钟缓存创建价格每百万token- 仅用于硬编码回退
CacheCreation1hPrice float64 // 1小时缓存创建价格每百万token- 仅用于硬编码回退
CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD)
CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD)
SupportsCacheBreakdown bool // 是否支持详细的缓存分类
}
@@ -172,12 +172,20 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) {
if s.pricingService != nil {
litellmPricing := s.pricingService.GetModelPricing(model)
if litellmPricing != nil {
// 启用 5m/1h 分类计费的条件:
// 1. 存在 1h 价格
// 2. 1h 价格 > 5m 价格(防止 LiteLLM 数据错误导致少收费)
price5m := litellmPricing.CacheCreationInputTokenCost
price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr
enableBreakdown := price1h > 0 && price1h > price5m
return &ModelPricing{
InputPricePerToken: litellmPricing.InputCostPerToken,
OutputPricePerToken: litellmPricing.OutputCostPerToken,
CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost,
CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost,
SupportsCacheBreakdown: false,
CacheCreation5mPrice: price5m,
CacheCreation1hPrice: price1h,
SupportsCacheBreakdown: enableBreakdown,
}, nil
}
}
@@ -209,9 +217,14 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul
// 计算缓存费用
if pricing.SupportsCacheBreakdown && (pricing.CacheCreation5mPrice > 0 || pricing.CacheCreation1hPrice > 0) {
// 支持详细缓存分类的模型5分钟/1小时缓存
breakdown.CacheCreationCost = float64(tokens.CacheCreation5mTokens)/1_000_000*pricing.CacheCreation5mPrice +
float64(tokens.CacheCreation1hTokens)/1_000_000*pricing.CacheCreation1hPrice
// 支持详细缓存分类的模型5分钟/1小时缓存,价格为 per-token
if tokens.CacheCreation5mTokens == 0 && tokens.CacheCreation1hTokens == 0 && tokens.CacheCreationTokens > 0 {
// API 未返回 ephemeral 明细,回退到全部按 5m 单价计费
breakdown.CacheCreationCost = float64(tokens.CacheCreationTokens) * pricing.CacheCreation5mPrice
} else {
breakdown.CacheCreationCost = float64(tokens.CacheCreation5mTokens)*pricing.CacheCreation5mPrice +
float64(tokens.CacheCreation1hTokens)*pricing.CacheCreation1hPrice
}
} else {
// 标准缓存创建价格per-token
breakdown.CacheCreationCost = float64(tokens.CacheCreationTokens) * pricing.CacheCreationPricePerToken
@@ -280,10 +293,12 @@ func (s *BillingService) CalculateCostWithLongContext(model string, tokens Usage
// 范围内部分:正常计费
inRangeTokens := UsageTokens{
InputTokens: inRangeInputTokens,
OutputTokens: tokens.OutputTokens, // 输出只算一次
CacheCreationTokens: tokens.CacheCreationTokens,
CacheReadTokens: inRangeCacheTokens,
InputTokens: inRangeInputTokens,
OutputTokens: tokens.OutputTokens, // 输出只算一次
CacheCreationTokens: tokens.CacheCreationTokens,
CacheReadTokens: inRangeCacheTokens,
CacheCreation5mTokens: tokens.CacheCreation5mTokens,
CacheCreation1hTokens: tokens.CacheCreation1hTokens,
}
inRangeCost, err := s.CalculateCost(model, inRangeTokens, rateMultiplier)
if err != nil {