diff --git a/backend/internal/pkg/antigravity/gemini_types.go b/backend/internal/pkg/antigravity/gemini_types.go index 8e3e3885..67f6c3e7 100644 --- a/backend/internal/pkg/antigravity/gemini_types.go +++ b/backend/internal/pkg/antigravity/gemini_types.go @@ -143,9 +143,10 @@ type GeminiCandidate struct { // GeminiUsageMetadata Gemini 用量元数据 type GeminiUsageMetadata struct { - PromptTokenCount int `json:"promptTokenCount,omitempty"` - CandidatesTokenCount int `json:"candidatesTokenCount,omitempty"` - TotalTokenCount int `json:"totalTokenCount,omitempty"` + PromptTokenCount int `json:"promptTokenCount,omitempty"` + CandidatesTokenCount int `json:"candidatesTokenCount,omitempty"` + CachedContentTokenCount int `json:"cachedContentTokenCount,omitempty"` + TotalTokenCount int `json:"totalTokenCount,omitempty"` } // DefaultSafetySettings 默认安全设置(关闭所有过滤) diff --git a/backend/internal/pkg/antigravity/response_transformer.go b/backend/internal/pkg/antigravity/response_transformer.go index 799de694..cd7f5f80 100644 --- a/backend/internal/pkg/antigravity/response_transformer.go +++ b/backend/internal/pkg/antigravity/response_transformer.go @@ -232,10 +232,14 @@ func (p *NonStreamingProcessor) buildResponse(geminiResp *GeminiResponse, respon stopReason = "max_tokens" } + // 注意:Gemini 的 promptTokenCount 包含 cachedContentTokenCount, + // 但 Claude 的 input_tokens 不包含 cache_read_input_tokens,需要减去 usage := ClaudeUsage{} if geminiResp.UsageMetadata != nil { - usage.InputTokens = geminiResp.UsageMetadata.PromptTokenCount + cached := geminiResp.UsageMetadata.CachedContentTokenCount + usage.InputTokens = geminiResp.UsageMetadata.PromptTokenCount - cached usage.OutputTokens = geminiResp.UsageMetadata.CandidatesTokenCount + usage.CacheReadInputTokens = cached } // 生成响应 ID diff --git a/backend/internal/pkg/antigravity/stream_transformer.go b/backend/internal/pkg/antigravity/stream_transformer.go index c5d954f5..9fe68a11 100644 --- a/backend/internal/pkg/antigravity/stream_transformer.go +++ b/backend/internal/pkg/antigravity/stream_transformer.go @@ -29,8 +29,9 @@ type StreamingProcessor struct { originalModel string // 累计 usage - inputTokens int - outputTokens int + inputTokens int + outputTokens int + cacheReadTokens int } // NewStreamingProcessor 创建流式响应处理器 @@ -76,9 +77,13 @@ func (p *StreamingProcessor) ProcessLine(line string) []byte { } // 更新 usage + // 注意:Gemini 的 promptTokenCount 包含 cachedContentTokenCount, + // 但 Claude 的 input_tokens 不包含 cache_read_input_tokens,需要减去 if geminiResp.UsageMetadata != nil { - p.inputTokens = geminiResp.UsageMetadata.PromptTokenCount + cached := geminiResp.UsageMetadata.CachedContentTokenCount + p.inputTokens = geminiResp.UsageMetadata.PromptTokenCount - cached p.outputTokens = geminiResp.UsageMetadata.CandidatesTokenCount + p.cacheReadTokens = cached } // 处理 parts @@ -108,8 +113,9 @@ func (p *StreamingProcessor) Finish() ([]byte, *ClaudeUsage) { } usage := &ClaudeUsage{ - InputTokens: p.inputTokens, - OutputTokens: p.outputTokens, + InputTokens: p.inputTokens, + OutputTokens: p.outputTokens, + CacheReadInputTokens: p.cacheReadTokens, } return result.Bytes(), usage @@ -123,8 +129,10 @@ func (p *StreamingProcessor) emitMessageStart(v1Resp *V1InternalResponse) []byte usage := ClaudeUsage{} if v1Resp.Response.UsageMetadata != nil { - usage.InputTokens = v1Resp.Response.UsageMetadata.PromptTokenCount + cached := v1Resp.Response.UsageMetadata.CachedContentTokenCount + usage.InputTokens = v1Resp.Response.UsageMetadata.PromptTokenCount - cached usage.OutputTokens = v1Resp.Response.UsageMetadata.CandidatesTokenCount + usage.CacheReadInputTokens = cached } responseID := v1Resp.ResponseID @@ -418,8 +426,9 @@ func (p *StreamingProcessor) emitFinish(finishReason string) []byte { } usage := ClaudeUsage{ - InputTokens: p.inputTokens, - OutputTokens: p.outputTokens, + InputTokens: p.inputTokens, + OutputTokens: p.outputTokens, + CacheReadInputTokens: p.cacheReadTokens, } deltaEvent := map[string]any{