fix(兼容): 将 Kimi cached_tokens 映射到 Claude 标准 cache_read_input_tokens
Kimi 等 Claude 兼容 API 返回缓存信息使用 OpenAI 风格的 cached_tokens 字段, 而非 Claude 标准的 cache_read_input_tokens,导致客户端收不到缓存命中信息且 内部计费缓存折扣为 0。 新增 reconcileCachedTokens 辅助函数,在 cache_read_input_tokens == 0 且 cached_tokens > 0 时自动填充,覆盖流式(message_start/message_delta)和 非流式两种响应路径。对 Claude 原生上游无影响。 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4176,6 +4176,20 @@ func (s *GatewayService) handleStreamingResponse(ctx context.Context, resp *http
|
||||
eventName = eventType
|
||||
}
|
||||
|
||||
// 兼容 Kimi cached_tokens → cache_read_input_tokens
|
||||
if eventType == "message_start" {
|
||||
if msg, ok := event["message"].(map[string]any); ok {
|
||||
if u, ok := msg["usage"].(map[string]any); ok {
|
||||
reconcileCachedTokens(u)
|
||||
}
|
||||
}
|
||||
}
|
||||
if eventType == "message_delta" {
|
||||
if u, ok := event["usage"].(map[string]any); ok {
|
||||
reconcileCachedTokens(u)
|
||||
}
|
||||
}
|
||||
|
||||
if needModelReplace {
|
||||
if msg, ok := event["message"].(map[string]any); ok {
|
||||
if model, ok := msg["model"].(string); ok && model == mappedModel {
|
||||
@@ -4526,6 +4540,17 @@ func (s *GatewayService) handleNonStreamingResponse(ctx context.Context, resp *h
|
||||
return nil, fmt.Errorf("parse response: %w", err)
|
||||
}
|
||||
|
||||
// 兼容 Kimi cached_tokens → cache_read_input_tokens
|
||||
if response.Usage.CacheReadInputTokens == 0 {
|
||||
cachedTokens := gjson.GetBytes(body, "usage.cached_tokens").Int()
|
||||
if cachedTokens > 0 {
|
||||
response.Usage.CacheReadInputTokens = int(cachedTokens)
|
||||
if newBody, err := sjson.SetBytes(body, "usage.cache_read_input_tokens", cachedTokens); err == nil {
|
||||
body = newBody
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果有模型映射,替换响应中的model字段
|
||||
if originalModel != mappedModel {
|
||||
body = s.replaceModelInResponseBody(body, mappedModel, originalModel)
|
||||
@@ -5311,3 +5336,21 @@ func (s *GatewayService) GetAvailableModels(ctx context.Context, groupID *int64,
|
||||
|
||||
return models
|
||||
}
|
||||
|
||||
// reconcileCachedTokens 兼容 Kimi 等上游:
|
||||
// 将 OpenAI 风格的 cached_tokens 映射到 Claude 标准的 cache_read_input_tokens
|
||||
func reconcileCachedTokens(usage map[string]any) bool {
|
||||
if usage == nil {
|
||||
return false
|
||||
}
|
||||
cacheRead, _ := usage["cache_read_input_tokens"].(float64)
|
||||
if cacheRead > 0 {
|
||||
return false // 已有标准字段,无需处理
|
||||
}
|
||||
cached, _ := usage["cached_tokens"].(float64)
|
||||
if cached <= 0 {
|
||||
return false
|
||||
}
|
||||
usage["cache_read_input_tokens"] = cached
|
||||
return true
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user