From ff29900f3068368b39a7b011ff17755c6a3b8e28 Mon Sep 17 00:00:00 2001 From: wenyifan Date: Fri, 20 Mar 2026 16:10:18 +0800 Subject: [PATCH 1/3] feat: Add support for counting cache-hit tokens in llama.cpp OpenAI-Compatible API --- relay/channel/openai/relay-openai.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/relay/channel/openai/relay-openai.go b/relay/channel/openai/relay-openai.go index a4de1611..02387fe0 100644 --- a/relay/channel/openai/relay-openai.go +++ b/relay/channel/openai/relay-openai.go @@ -627,6 +627,12 @@ func applyUsagePostProcessing(info *relaycommon.RelayInfo, usage *dto.Usage, res usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens } } + case constant.ChannelTypeOpenAI: + if usage.PromptTokensDetails.CachedTokens == 0 { + if cachedTokens, ok := extractLlamaCachedTokensFromBody(responseBody); ok { + usage.PromptTokensDetails.CachedTokens = cachedTokens + } + } } } @@ -689,3 +695,21 @@ func extractMoonshotCachedTokensFromBody(body []byte) (int, bool) { return 0, false } + +// extractLlamaCachedTokensFromBody 从llama.cpp的非标准位置提取cache_n +func extractLlamaCachedTokensFromBody(body []byte) (int, bool) { + if len(body) == 0 { + return 0, false + } + + var payload struct { + Usage struct { + CachedTokens *int `json:"cache_n"` + } `json:"timings"` + } + + if err := common.Unmarshal(body, &payload); err != nil { + return 0, false + } + return *payload.Usage.CachedTokens, true +} From 498199b37ddd1e29736b0e41b1ac79d73c049a27 Mon Sep 17 00:00:00 2001 From: wenyifan Date: Fri, 20 Mar 2026 16:38:48 +0800 Subject: [PATCH 2/3] fix code quality --- relay/channel/openai/relay-openai.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/relay/channel/openai/relay-openai.go b/relay/channel/openai/relay-openai.go index 02387fe0..222bb28f 100644 --- a/relay/channel/openai/relay-openai.go +++ b/relay/channel/openai/relay-openai.go @@ -703,13 +703,19 @@ func extractLlamaCachedTokensFromBody(body []byte) (int, bool) { } var payload struct { - Usage struct { - CachedTokens *int `json:"cache_n"` + Timings struct { + Usage struct { + CachedTokens *int `json:"cache_n"` + } `json:"usage"` } `json:"timings"` } if err := common.Unmarshal(body, &payload); err != nil { return 0, false } - return *payload.Usage.CachedTokens, true + + if payload.Timings.Usage.CachedTokens == nil { + return 0, false + } + return *payload.Timings.Usage.CachedTokens, true } From 2c3ae32c8e7804484477d1f0b26bdee8fdc14467 Mon Sep 17 00:00:00 2001 From: wenyifan Date: Fri, 20 Mar 2026 16:48:04 +0800 Subject: [PATCH 3/3] fix map --- relay/channel/openai/relay-openai.go | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/relay/channel/openai/relay-openai.go b/relay/channel/openai/relay-openai.go index 222bb28f..9ef2c490 100644 --- a/relay/channel/openai/relay-openai.go +++ b/relay/channel/openai/relay-openai.go @@ -704,9 +704,7 @@ func extractLlamaCachedTokensFromBody(body []byte) (int, bool) { var payload struct { Timings struct { - Usage struct { - CachedTokens *int `json:"cache_n"` - } `json:"usage"` + CachedTokens *int `json:"cache_n"` } `json:"timings"` } @@ -714,8 +712,8 @@ func extractLlamaCachedTokensFromBody(body []byte) (int, bool) { return 0, false } - if payload.Timings.Usage.CachedTokens == nil { + if payload.Timings.CachedTokens == nil { return 0, false } - return *payload.Timings.Usage.CachedTokens, true + return *payload.Timings.CachedTokens, true }