diff --git a/backend/internal/service/antigravity_gateway_service.go b/backend/internal/service/antigravity_gateway_service.go index 9e7ed7d5..1d87f4b1 100644 --- a/backend/internal/service/antigravity_gateway_service.go +++ b/backend/internal/service/antigravity_gateway_service.go @@ -4111,6 +4111,15 @@ func (s *AntigravityGatewayService) extractSSEUsage(line string, usage *ClaudeUs if v, ok := u["cache_creation_input_tokens"].(float64); ok && int(v) > 0 { usage.CacheCreationInputTokens = int(v) } + // 解析嵌套的 cache_creation 对象中的 5m/1h 明细 + if cc, ok := u["cache_creation"].(map[string]any); ok { + if v, ok := cc["ephemeral_5m_input_tokens"].(float64); ok { + usage.CacheCreation5mTokens = int(v) + } + if v, ok := cc["ephemeral_1h_input_tokens"].(float64); ok { + usage.CacheCreation1hTokens = int(v) + } + } } // extractClaudeUsage 从非流式 Claude 响应提取 usage @@ -4133,6 +4142,15 @@ func (s *AntigravityGatewayService) extractClaudeUsage(body []byte) *ClaudeUsage if v, ok := u["cache_creation_input_tokens"].(float64); ok { usage.CacheCreationInputTokens = int(v) } + // 解析嵌套的 cache_creation 对象中的 5m/1h 明细 + if cc, ok := u["cache_creation"].(map[string]any); ok { + if v, ok := cc["ephemeral_5m_input_tokens"].(float64); ok { + usage.CacheCreation5mTokens = int(v) + } + if v, ok := cc["ephemeral_1h_input_tokens"].(float64); ok { + usage.CacheCreation1hTokens = int(v) + } + } } return usage } diff --git a/backend/internal/service/billing_service.go b/backend/internal/service/billing_service.go index db5a9708..6934bc64 100644 --- a/backend/internal/service/billing_service.go +++ b/backend/internal/service/billing_service.go @@ -31,8 +31,8 @@ type ModelPricing struct { OutputPricePerToken float64 // 每token输出价格 (USD) CacheCreationPricePerToken float64 // 缓存创建每token价格 (USD) CacheReadPricePerToken float64 // 缓存读取每token价格 (USD) - CacheCreation5mPrice float64 // 5分钟缓存创建价格(每百万token)- 仅用于硬编码回退 - CacheCreation1hPrice float64 // 1小时缓存创建价格(每百万token)- 仅用于硬编码回退 + CacheCreation5mPrice float64 // 5分钟缓存创建每token价格 (USD) + CacheCreation1hPrice float64 // 1小时缓存创建每token价格 (USD) SupportsCacheBreakdown bool // 是否支持详细的缓存分类 } @@ -172,12 +172,20 @@ func (s *BillingService) GetModelPricing(model string) (*ModelPricing, error) { if s.pricingService != nil { litellmPricing := s.pricingService.GetModelPricing(model) if litellmPricing != nil { + // 启用 5m/1h 分类计费的条件: + // 1. 存在 1h 价格 + // 2. 1h 价格 > 5m 价格(防止 LiteLLM 数据错误导致少收费) + price5m := litellmPricing.CacheCreationInputTokenCost + price1h := litellmPricing.CacheCreationInputTokenCostAbove1hr + enableBreakdown := price1h > 0 && price1h > price5m return &ModelPricing{ InputPricePerToken: litellmPricing.InputCostPerToken, OutputPricePerToken: litellmPricing.OutputCostPerToken, CacheCreationPricePerToken: litellmPricing.CacheCreationInputTokenCost, CacheReadPricePerToken: litellmPricing.CacheReadInputTokenCost, - SupportsCacheBreakdown: false, + CacheCreation5mPrice: price5m, + CacheCreation1hPrice: price1h, + SupportsCacheBreakdown: enableBreakdown, }, nil } } @@ -209,9 +217,14 @@ func (s *BillingService) CalculateCost(model string, tokens UsageTokens, rateMul // 计算缓存费用 if pricing.SupportsCacheBreakdown && (pricing.CacheCreation5mPrice > 0 || pricing.CacheCreation1hPrice > 0) { - // 支持详细缓存分类的模型(5分钟/1小时缓存) - breakdown.CacheCreationCost = float64(tokens.CacheCreation5mTokens)/1_000_000*pricing.CacheCreation5mPrice + - float64(tokens.CacheCreation1hTokens)/1_000_000*pricing.CacheCreation1hPrice + // 支持详细缓存分类的模型(5分钟/1小时缓存,价格为 per-token) + if tokens.CacheCreation5mTokens == 0 && tokens.CacheCreation1hTokens == 0 && tokens.CacheCreationTokens > 0 { + // API 未返回 ephemeral 明细,回退到全部按 5m 单价计费 + breakdown.CacheCreationCost = float64(tokens.CacheCreationTokens) * pricing.CacheCreation5mPrice + } else { + breakdown.CacheCreationCost = float64(tokens.CacheCreation5mTokens)*pricing.CacheCreation5mPrice + + float64(tokens.CacheCreation1hTokens)*pricing.CacheCreation1hPrice + } } else { // 标准缓存创建价格(per-token) breakdown.CacheCreationCost = float64(tokens.CacheCreationTokens) * pricing.CacheCreationPricePerToken @@ -280,10 +293,12 @@ func (s *BillingService) CalculateCostWithLongContext(model string, tokens Usage // 范围内部分:正常计费 inRangeTokens := UsageTokens{ - InputTokens: inRangeInputTokens, - OutputTokens: tokens.OutputTokens, // 输出只算一次 - CacheCreationTokens: tokens.CacheCreationTokens, - CacheReadTokens: inRangeCacheTokens, + InputTokens: inRangeInputTokens, + OutputTokens: tokens.OutputTokens, // 输出只算一次 + CacheCreationTokens: tokens.CacheCreationTokens, + CacheReadTokens: inRangeCacheTokens, + CacheCreation5mTokens: tokens.CacheCreation5mTokens, + CacheCreation1hTokens: tokens.CacheCreation1hTokens, } inRangeCost, err := s.CalculateCost(model, inRangeTokens, rateMultiplier) if err != nil { diff --git a/backend/internal/service/gateway_service.go b/backend/internal/service/gateway_service.go index 56af4610..7c3d7f3a 100644 --- a/backend/internal/service/gateway_service.go +++ b/backend/internal/service/gateway_service.go @@ -349,6 +349,8 @@ type ClaudeUsage struct { OutputTokens int `json:"output_tokens"` CacheCreationInputTokens int `json:"cache_creation_input_tokens"` CacheReadInputTokens int `json:"cache_read_input_tokens"` + CacheCreation5mTokens int // 5分钟缓存创建token(来自嵌套 cache_creation 对象) + CacheCreation1hTokens int // 1小时缓存创建token(来自嵌套 cache_creation 对象) } // ForwardResult 转发结果 @@ -4401,6 +4403,14 @@ func (s *GatewayService) parseSSEUsage(data string, usage *ClaudeUsage) { usage.InputTokens = msgStart.Message.Usage.InputTokens usage.CacheCreationInputTokens = msgStart.Message.Usage.CacheCreationInputTokens usage.CacheReadInputTokens = msgStart.Message.Usage.CacheReadInputTokens + + // 解析嵌套的 cache_creation 对象中的 5m/1h 明细 + cc5m := gjson.Get(data, "message.usage.cache_creation.ephemeral_5m_input_tokens") + cc1h := gjson.Get(data, "message.usage.cache_creation.ephemeral_1h_input_tokens") + if cc5m.Exists() || cc1h.Exists() { + usage.CacheCreation5mTokens = int(cc5m.Int()) + usage.CacheCreation1hTokens = int(cc1h.Int()) + } } // 解析message_delta获取tokens(兼容GLM等把所有usage放在delta中的API) @@ -4429,6 +4439,14 @@ func (s *GatewayService) parseSSEUsage(data string, usage *ClaudeUsage) { if msgDelta.Usage.CacheReadInputTokens > 0 { usage.CacheReadInputTokens = msgDelta.Usage.CacheReadInputTokens } + + // 解析嵌套的 cache_creation 对象中的 5m/1h 明细 + cc5m := gjson.Get(data, "usage.cache_creation.ephemeral_5m_input_tokens") + cc1h := gjson.Get(data, "usage.cache_creation.ephemeral_1h_input_tokens") + if cc5m.Exists() || cc1h.Exists() { + usage.CacheCreation5mTokens = int(cc5m.Int()) + usage.CacheCreation1hTokens = int(cc1h.Int()) + } } } @@ -4449,6 +4467,14 @@ func (s *GatewayService) handleNonStreamingResponse(ctx context.Context, resp *h return nil, fmt.Errorf("parse response: %w", err) } + // 解析嵌套的 cache_creation 对象中的 5m/1h 明细 + cc5m := gjson.GetBytes(body, "usage.cache_creation.ephemeral_5m_input_tokens") + cc1h := gjson.GetBytes(body, "usage.cache_creation.ephemeral_1h_input_tokens") + if cc5m.Exists() || cc1h.Exists() { + response.Usage.CacheCreation5mTokens = int(cc5m.Int()) + response.Usage.CacheCreation1hTokens = int(cc1h.Int()) + } + // 兼容 Kimi cached_tokens → cache_read_input_tokens if response.Usage.CacheReadInputTokens == 0 { cachedTokens := gjson.GetBytes(body, "usage.cached_tokens").Int() @@ -4566,10 +4592,12 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu } else { // Token 计费 tokens := UsageTokens{ - InputTokens: result.Usage.InputTokens, - OutputTokens: result.Usage.OutputTokens, - CacheCreationTokens: result.Usage.CacheCreationInputTokens, - CacheReadTokens: result.Usage.CacheReadInputTokens, + InputTokens: result.Usage.InputTokens, + OutputTokens: result.Usage.OutputTokens, + CacheCreationTokens: result.Usage.CacheCreationInputTokens, + CacheReadTokens: result.Usage.CacheReadInputTokens, + CacheCreation5mTokens: result.Usage.CacheCreation5mTokens, + CacheCreation1hTokens: result.Usage.CacheCreation1hTokens, } var err error cost, err = s.billingService.CalculateCost(result.Model, tokens, multiplier) @@ -4603,6 +4631,8 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu OutputTokens: result.Usage.OutputTokens, CacheCreationTokens: result.Usage.CacheCreationInputTokens, CacheReadTokens: result.Usage.CacheReadInputTokens, + CacheCreation5mTokens: result.Usage.CacheCreation5mTokens, + CacheCreation1hTokens: result.Usage.CacheCreation1hTokens, InputCost: cost.InputCost, OutputCost: cost.OutputCost, CacheCreationCost: cost.CacheCreationCost, @@ -4747,10 +4777,12 @@ func (s *GatewayService) RecordUsageWithLongContext(ctx context.Context, input * } else { // Token 计费(使用长上下文计费方法) tokens := UsageTokens{ - InputTokens: result.Usage.InputTokens, - OutputTokens: result.Usage.OutputTokens, - CacheCreationTokens: result.Usage.CacheCreationInputTokens, - CacheReadTokens: result.Usage.CacheReadInputTokens, + InputTokens: result.Usage.InputTokens, + OutputTokens: result.Usage.OutputTokens, + CacheCreationTokens: result.Usage.CacheCreationInputTokens, + CacheReadTokens: result.Usage.CacheReadInputTokens, + CacheCreation5mTokens: result.Usage.CacheCreation5mTokens, + CacheCreation1hTokens: result.Usage.CacheCreation1hTokens, } var err error cost, err = s.billingService.CalculateCostWithLongContext(result.Model, tokens, multiplier, input.LongContextThreshold, input.LongContextMultiplier) @@ -4784,6 +4816,8 @@ func (s *GatewayService) RecordUsageWithLongContext(ctx context.Context, input * OutputTokens: result.Usage.OutputTokens, CacheCreationTokens: result.Usage.CacheCreationInputTokens, CacheReadTokens: result.Usage.CacheReadInputTokens, + CacheCreation5mTokens: result.Usage.CacheCreation5mTokens, + CacheCreation1hTokens: result.Usage.CacheCreation1hTokens, InputCost: cost.InputCost, OutputCost: cost.OutputCost, CacheCreationCost: cost.CacheCreationCost, diff --git a/backend/internal/service/pricing_service.go b/backend/internal/service/pricing_service.go index d8db0d67..a3a94189 100644 --- a/backend/internal/service/pricing_service.go +++ b/backend/internal/service/pricing_service.go @@ -27,14 +27,15 @@ var ( // LiteLLMModelPricing LiteLLM价格数据结构 // 只保留我们需要的字段,使用指针来处理可能缺失的值 type LiteLLMModelPricing struct { - InputCostPerToken float64 `json:"input_cost_per_token"` - OutputCostPerToken float64 `json:"output_cost_per_token"` - CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"` - CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"` - LiteLLMProvider string `json:"litellm_provider"` - Mode string `json:"mode"` - SupportsPromptCaching bool `json:"supports_prompt_caching"` - OutputCostPerImage float64 `json:"output_cost_per_image"` // 图片生成模型每张图片价格 + InputCostPerToken float64 `json:"input_cost_per_token"` + OutputCostPerToken float64 `json:"output_cost_per_token"` + CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"` + CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"` + CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"` + LiteLLMProvider string `json:"litellm_provider"` + Mode string `json:"mode"` + SupportsPromptCaching bool `json:"supports_prompt_caching"` + OutputCostPerImage float64 `json:"output_cost_per_image"` // 图片生成模型每张图片价格 } // PricingRemoteClient 远程价格数据获取接口 @@ -45,14 +46,15 @@ type PricingRemoteClient interface { // LiteLLMRawEntry 用于解析原始JSON数据 type LiteLLMRawEntry struct { - InputCostPerToken *float64 `json:"input_cost_per_token"` - OutputCostPerToken *float64 `json:"output_cost_per_token"` - CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost"` - CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost"` - LiteLLMProvider string `json:"litellm_provider"` - Mode string `json:"mode"` - SupportsPromptCaching bool `json:"supports_prompt_caching"` - OutputCostPerImage *float64 `json:"output_cost_per_image"` + InputCostPerToken *float64 `json:"input_cost_per_token"` + OutputCostPerToken *float64 `json:"output_cost_per_token"` + CacheCreationInputTokenCost *float64 `json:"cache_creation_input_token_cost"` + CacheCreationInputTokenCostAbove1hr *float64 `json:"cache_creation_input_token_cost_above_1hr"` + CacheReadInputTokenCost *float64 `json:"cache_read_input_token_cost"` + LiteLLMProvider string `json:"litellm_provider"` + Mode string `json:"mode"` + SupportsPromptCaching bool `json:"supports_prompt_caching"` + OutputCostPerImage *float64 `json:"output_cost_per_image"` } // PricingService 动态价格服务 @@ -318,6 +320,9 @@ func (s *PricingService) parsePricingData(body []byte) (map[string]*LiteLLMModel if entry.CacheCreationInputTokenCost != nil { pricing.CacheCreationInputTokenCost = *entry.CacheCreationInputTokenCost } + if entry.CacheCreationInputTokenCostAbove1hr != nil { + pricing.CacheCreationInputTokenCostAbove1hr = *entry.CacheCreationInputTokenCostAbove1hr + } if entry.CacheReadInputTokenCost != nil { pricing.CacheReadInputTokenCost = *entry.CacheReadInputTokenCost } diff --git a/backend/internal/service/usage_log.go b/backend/internal/service/usage_log.go index a9721d7f..7b7924da 100644 --- a/backend/internal/service/usage_log.go +++ b/backend/internal/service/usage_log.go @@ -26,8 +26,8 @@ type UsageLog struct { CacheCreationTokens int CacheReadTokens int - CacheCreation5mTokens int - CacheCreation1hTokens int + CacheCreation5mTokens int `gorm:"column:cache_creation_5m_tokens"` + CacheCreation1hTokens int `gorm:"column:cache_creation_1h_tokens"` InputCost float64 OutputCost float64 diff --git a/backend/migrations/054_drop_legacy_cache_columns.sql b/backend/migrations/054_drop_legacy_cache_columns.sql new file mode 100644 index 00000000..ac73cd28 --- /dev/null +++ b/backend/migrations/054_drop_legacy_cache_columns.sql @@ -0,0 +1,14 @@ +-- Drop legacy cache token columns that lack the underscore separator. +-- These were created by GORM's automatic snake_case conversion: +-- CacheCreation5mTokens → cache_creation5m_tokens (incorrect) +-- CacheCreation1hTokens → cache_creation1h_tokens (incorrect) +-- +-- The canonical columns are: +-- cache_creation_5m_tokens (defined in 001_init.sql) +-- cache_creation_1h_tokens (defined in 001_init.sql) +-- +-- Migration 009 already copied data from legacy → canonical columns. +-- This migration drops the legacy columns to avoid confusion. + +ALTER TABLE usage_logs DROP COLUMN IF EXISTS cache_creation5m_tokens; +ALTER TABLE usage_logs DROP COLUMN IF EXISTS cache_creation1h_tokens; diff --git a/frontend/src/components/admin/usage/UsageTable.vue b/frontend/src/components/admin/usage/UsageTable.vue index fbb1942a..2c39807e 100644 --- a/frontend/src/components/admin/usage/UsageTable.vue +++ b/frontend/src/components/admin/usage/UsageTable.vue @@ -70,6 +70,7 @@