fix: claude affinity cache counter (#2980)

* fix: claude affinity cache counter

* fix: claude affinity cache counter

* fix: stabilize cache usage stats format and simplify modal rendering
This commit is contained in:
Seefs
2026-02-22 23:30:02 +08:00
committed by GitHub
parent 183c750e59
commit 8cfc2b4398
7 changed files with 304 additions and 53 deletions

View File

@@ -152,7 +152,8 @@ type RelayInfo struct {
// RequestConversionChain records request format conversions in order, e.g. // RequestConversionChain records request format conversions in order, e.g.
// ["openai", "openai_responses"] or ["openai", "claude"]. // ["openai", "openai_responses"] or ["openai", "claude"].
RequestConversionChain []types.RelayFormat RequestConversionChain []types.RelayFormat
// 最终请求到上游的格式 TODO: 当前仅设置了Claude // 最终请求到上游的格式。可由 adaptor 显式设置;
// 若为空,调用 GetFinalRequestRelayFormat 会回退到 RequestConversionChain 的最后一项或 RelayFormat。
FinalRequestRelayFormat types.RelayFormat FinalRequestRelayFormat types.RelayFormat
ThinkingContentInfo ThinkingContentInfo
@@ -579,6 +580,19 @@ func (info *RelayInfo) AppendRequestConversion(format types.RelayFormat) {
info.RequestConversionChain = append(info.RequestConversionChain, format) info.RequestConversionChain = append(info.RequestConversionChain, format)
} }
func (info *RelayInfo) GetFinalRequestRelayFormat() types.RelayFormat {
if info == nil {
return ""
}
if info.FinalRequestRelayFormat != "" {
return info.FinalRequestRelayFormat
}
if n := len(info.RequestConversionChain); n > 0 {
return info.RequestConversionChain[n-1]
}
return info.RelayFormat
}
func GenRelayInfoResponsesCompaction(c *gin.Context, request *dto.OpenAIResponsesCompactionRequest) *RelayInfo { func GenRelayInfoResponsesCompaction(c *gin.Context, request *dto.OpenAIResponsesCompactionRequest) *RelayInfo {
info := genBaseRelayInfo(c, request) info := genBaseRelayInfo(c, request)
if info.RelayMode == relayconstant.RelayModeUnknown { if info.RelayMode == relayconstant.RelayModeUnknown {

View File

@@ -0,0 +1,40 @@
package common
import (
"testing"
"github.com/QuantumNous/new-api/types"
"github.com/stretchr/testify/require"
)
func TestRelayInfoGetFinalRequestRelayFormatPrefersExplicitFinal(t *testing.T) {
info := &RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
RequestConversionChain: []types.RelayFormat{types.RelayFormatOpenAI, types.RelayFormatClaude},
FinalRequestRelayFormat: types.RelayFormatOpenAIResponses,
}
require.Equal(t, types.RelayFormat(types.RelayFormatOpenAIResponses), info.GetFinalRequestRelayFormat())
}
func TestRelayInfoGetFinalRequestRelayFormatFallsBackToConversionChain(t *testing.T) {
info := &RelayInfo{
RelayFormat: types.RelayFormatOpenAI,
RequestConversionChain: []types.RelayFormat{types.RelayFormatOpenAI, types.RelayFormatClaude},
}
require.Equal(t, types.RelayFormat(types.RelayFormatClaude), info.GetFinalRequestRelayFormat())
}
func TestRelayInfoGetFinalRequestRelayFormatFallsBackToRelayFormat(t *testing.T) {
info := &RelayInfo{
RelayFormat: types.RelayFormatGemini,
}
require.Equal(t, types.RelayFormat(types.RelayFormatGemini), info.GetFinalRequestRelayFormat())
}
func TestRelayInfoGetFinalRequestRelayFormatNilReceiver(t *testing.T) {
var info *RelayInfo
require.Equal(t, types.RelayFormat(""), info.GetFinalRequestRelayFormat())
}

View File

@@ -232,7 +232,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
} }
if originUsage != nil { if originUsage != nil {
service.ObserveChannelAffinityUsageCacheFromContext(ctx, usage) service.ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
} }
adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason) adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
@@ -336,7 +336,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
var audioInputQuota decimal.Decimal var audioInputQuota decimal.Decimal
var audioInputPrice float64 var audioInputPrice float64
isClaudeUsageSemantic := relayInfo.FinalRequestRelayFormat == types.RelayFormatClaude isClaudeUsageSemantic := relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude
if !relayInfo.PriceData.UsePrice { if !relayInfo.PriceData.UsePrice {
baseTokens := dPromptTokens baseTokens := dPromptTokens
// 减去 cached tokens // 减去 cached tokens

View File

@@ -13,6 +13,7 @@ import (
"github.com/QuantumNous/new-api/dto" "github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/pkg/cachex" "github.com/QuantumNous/new-api/pkg/cachex"
"github.com/QuantumNous/new-api/setting/operation_setting" "github.com/QuantumNous/new-api/setting/operation_setting"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/samber/hot" "github.com/samber/hot"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
@@ -61,6 +62,12 @@ type ChannelAffinityStatsContext struct {
TTLSeconds int64 TTLSeconds int64
} }
const (
cacheTokenRateModeCachedOverPrompt = "cached_over_prompt"
cacheTokenRateModeCachedOverPromptPlusCached = "cached_over_prompt_plus_cached"
cacheTokenRateModeMixed = "mixed"
)
type ChannelAffinityCacheStats struct { type ChannelAffinityCacheStats struct {
Enabled bool `json:"enabled"` Enabled bool `json:"enabled"`
Total int `json:"total"` Total int `json:"total"`
@@ -565,9 +572,10 @@ func RecordChannelAffinity(c *gin.Context, channelID int) {
} }
type ChannelAffinityUsageCacheStats struct { type ChannelAffinityUsageCacheStats struct {
RuleName string `json:"rule_name"` RuleName string `json:"rule_name"`
UsingGroup string `json:"using_group"` UsingGroup string `json:"using_group"`
KeyFingerprint string `json:"key_fp"` KeyFingerprint string `json:"key_fp"`
CachedTokenRateMode string `json:"cached_token_rate_mode"`
Hit int64 `json:"hit"` Hit int64 `json:"hit"`
Total int64 `json:"total"` Total int64 `json:"total"`
@@ -582,6 +590,8 @@ type ChannelAffinityUsageCacheStats struct {
} }
type ChannelAffinityUsageCacheCounters struct { type ChannelAffinityUsageCacheCounters struct {
CachedTokenRateMode string `json:"cached_token_rate_mode"`
Hit int64 `json:"hit"` Hit int64 `json:"hit"`
Total int64 `json:"total"` Total int64 `json:"total"`
WindowSeconds int64 `json:"window_seconds"` WindowSeconds int64 `json:"window_seconds"`
@@ -596,12 +606,17 @@ type ChannelAffinityUsageCacheCounters struct {
var channelAffinityUsageCacheStatsLocks [64]sync.Mutex var channelAffinityUsageCacheStatsLocks [64]sync.Mutex
func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage) { // ObserveChannelAffinityUsageCacheByRelayFormat records usage cache stats with a stable rate mode derived from relay format.
func ObserveChannelAffinityUsageCacheByRelayFormat(c *gin.Context, usage *dto.Usage, relayFormat types.RelayFormat) {
ObserveChannelAffinityUsageCacheFromContext(c, usage, cachedTokenRateModeByRelayFormat(relayFormat))
}
func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage, cachedTokenRateMode string) {
statsCtx, ok := GetChannelAffinityStatsContext(c) statsCtx, ok := GetChannelAffinityStatsContext(c)
if !ok { if !ok {
return return
} }
observeChannelAffinityUsageCache(statsCtx, usage) observeChannelAffinityUsageCache(statsCtx, usage, cachedTokenRateMode)
} }
func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) ChannelAffinityUsageCacheStats { func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) ChannelAffinityUsageCacheStats {
@@ -628,6 +643,7 @@ func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) Chann
} }
} }
return ChannelAffinityUsageCacheStats{ return ChannelAffinityUsageCacheStats{
CachedTokenRateMode: v.CachedTokenRateMode,
RuleName: ruleName, RuleName: ruleName,
UsingGroup: usingGroup, UsingGroup: usingGroup,
KeyFingerprint: keyFp, KeyFingerprint: keyFp,
@@ -643,7 +659,7 @@ func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) Chann
} }
} }
func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage) { func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage, cachedTokenRateMode string) {
entryKey := channelAffinityUsageCacheEntryKey(statsCtx.RuleName, statsCtx.UsingGroup, statsCtx.KeyFingerprint) entryKey := channelAffinityUsageCacheEntryKey(statsCtx.RuleName, statsCtx.UsingGroup, statsCtx.KeyFingerprint)
if entryKey == "" { if entryKey == "" {
return return
@@ -669,6 +685,14 @@ func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usag
if !found { if !found {
next = ChannelAffinityUsageCacheCounters{} next = ChannelAffinityUsageCacheCounters{}
} }
currentMode := normalizeCachedTokenRateMode(cachedTokenRateMode)
if currentMode != "" {
if next.CachedTokenRateMode == "" {
next.CachedTokenRateMode = currentMode
} else if next.CachedTokenRateMode != currentMode && next.CachedTokenRateMode != cacheTokenRateModeMixed {
next.CachedTokenRateMode = cacheTokenRateModeMixed
}
}
next.Total++ next.Total++
hit, cachedTokens, promptCacheHitTokens := usageCacheSignals(usage) hit, cachedTokens, promptCacheHitTokens := usageCacheSignals(usage)
if hit { if hit {
@@ -684,6 +708,30 @@ func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usag
_ = cache.SetWithTTL(entryKey, next, ttl) _ = cache.SetWithTTL(entryKey, next, ttl)
} }
func normalizeCachedTokenRateMode(mode string) string {
switch mode {
case cacheTokenRateModeCachedOverPrompt:
return cacheTokenRateModeCachedOverPrompt
case cacheTokenRateModeCachedOverPromptPlusCached:
return cacheTokenRateModeCachedOverPromptPlusCached
case cacheTokenRateModeMixed:
return cacheTokenRateModeMixed
default:
return ""
}
}
func cachedTokenRateModeByRelayFormat(relayFormat types.RelayFormat) string {
switch relayFormat {
case types.RelayFormatOpenAI, types.RelayFormatOpenAIResponses, types.RelayFormatOpenAIResponsesCompaction:
return cacheTokenRateModeCachedOverPrompt
case types.RelayFormatClaude:
return cacheTokenRateModeCachedOverPromptPlusCached
default:
return ""
}
}
func channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp string) string { func channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp string) string {
ruleName = strings.TrimSpace(ruleName) ruleName = strings.TrimSpace(ruleName)
usingGroup = strings.TrimSpace(usingGroup) usingGroup = strings.TrimSpace(usingGroup)

View File

@@ -0,0 +1,105 @@
package service
import (
"fmt"
"net/http/httptest"
"testing"
"time"
"github.com/QuantumNous/new-api/dto"
"github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin"
"github.com/stretchr/testify/require"
)
func buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP string) *gin.Context {
rec := httptest.NewRecorder()
ctx, _ := gin.CreateTestContext(rec)
setChannelAffinityContext(ctx, channelAffinityMeta{
CacheKey: fmt.Sprintf("test:%s:%s:%s", ruleName, usingGroup, keyFP),
TTLSeconds: 600,
RuleName: ruleName,
UsingGroup: usingGroup,
KeyFingerprint: keyFP,
})
return ctx
}
func TestObserveChannelAffinityUsageCacheByRelayFormat_ClaudeMode(t *testing.T) {
ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
usingGroup := "default"
keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
usage := &dto.Usage{
PromptTokens: 100,
CompletionTokens: 40,
TotalTokens: 140,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 30,
},
}
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, types.RelayFormatClaude)
stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP)
require.EqualValues(t, 1, stats.Total)
require.EqualValues(t, 1, stats.Hit)
require.EqualValues(t, 100, stats.PromptTokens)
require.EqualValues(t, 40, stats.CompletionTokens)
require.EqualValues(t, 140, stats.TotalTokens)
require.EqualValues(t, 30, stats.CachedTokens)
require.Equal(t, cacheTokenRateModeCachedOverPromptPlusCached, stats.CachedTokenRateMode)
}
func TestObserveChannelAffinityUsageCacheByRelayFormat_MixedMode(t *testing.T) {
ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
usingGroup := "default"
keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
openAIUsage := &dto.Usage{
PromptTokens: 100,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 10,
},
}
claudeUsage := &dto.Usage{
PromptTokens: 80,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 20,
},
}
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, openAIUsage, types.RelayFormatOpenAI)
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, claudeUsage, types.RelayFormatClaude)
stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP)
require.EqualValues(t, 2, stats.Total)
require.EqualValues(t, 2, stats.Hit)
require.EqualValues(t, 180, stats.PromptTokens)
require.EqualValues(t, 30, stats.CachedTokens)
require.Equal(t, cacheTokenRateModeMixed, stats.CachedTokenRateMode)
}
func TestObserveChannelAffinityUsageCacheByRelayFormat_UnsupportedModeKeepsEmpty(t *testing.T) {
ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
usingGroup := "default"
keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
usage := &dto.Usage{
PromptTokens: 100,
PromptTokensDetails: dto.InputTokenDetails{
CachedTokens: 25,
},
}
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, types.RelayFormatGemini)
stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP)
require.EqualValues(t, 1, stats.Total)
require.EqualValues(t, 1, stats.Hit)
require.EqualValues(t, 25, stats.CachedTokens)
require.Equal(t, "", stats.CachedTokenRateMode)
}

View File

@@ -236,6 +236,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
} }
func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) { func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) {
if usage != nil {
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
}
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix() useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
promptTokens := usage.PromptTokens promptTokens := usage.PromptTokens

View File

@@ -39,6 +39,21 @@ function formatTokenRate(n, d) {
return `${r.toFixed(2)}%`; return `${r.toFixed(2)}%`;
} }
function formatCachedTokenRate(cachedTokens, promptTokens, mode) {
if (mode === 'cached_over_prompt_plus_cached') {
const denominator = Number(promptTokens || 0) + Number(cachedTokens || 0);
return formatTokenRate(cachedTokens, denominator);
}
if (mode === 'cached_over_prompt') {
return formatTokenRate(cachedTokens, promptTokens);
}
return '-';
}
function hasTextValue(value) {
return typeof value === 'string' && value.trim() !== '';
}
const ChannelAffinityUsageCacheModal = ({ const ChannelAffinityUsageCacheModal = ({
t, t,
showChannelAffinityUsageCacheModal, showChannelAffinityUsageCacheModal,
@@ -107,7 +122,7 @@ const ChannelAffinityUsageCacheModal = ({
t, t,
]); ]);
const rows = useMemo(() => { const { rows, supportsTokenStats } = useMemo(() => {
const s = stats || {}; const s = stats || {};
const hit = Number(s.hit || 0); const hit = Number(s.hit || 0);
const total = Number(s.total || 0); const total = Number(s.total || 0);
@@ -118,48 +133,62 @@ const ChannelAffinityUsageCacheModal = ({
const totalTokens = Number(s.total_tokens || 0); const totalTokens = Number(s.total_tokens || 0);
const cachedTokens = Number(s.cached_tokens || 0); const cachedTokens = Number(s.cached_tokens || 0);
const promptCacheHitTokens = Number(s.prompt_cache_hit_tokens || 0); const promptCacheHitTokens = Number(s.prompt_cache_hit_tokens || 0);
const cachedTokenRateMode = String(s.cached_token_rate_mode || '').trim();
const supportsTokenStats =
cachedTokenRateMode === 'cached_over_prompt' ||
cachedTokenRateMode === 'cached_over_prompt_plus_cached' ||
cachedTokenRateMode === 'mixed';
return [ const data = [];
{ key: t('规则'), value: s.rule_name || params.rule_name || '-' }, const ruleName = String(s.rule_name || params.rule_name || '').trim();
{ key: t('分组'), value: s.using_group || params.using_group || '-' }, const usingGroup = String(s.using_group || params.using_group || '').trim();
{ const keyHint = String(params.key_hint || '').trim();
key: t('Key 摘要'), const keyFp = String(s.key_fp || params.key_fp || '').trim();
value: params.key_hint || '-',
}, if (hasTextValue(ruleName)) {
{ data.push({ key: t('规则'), value: ruleName });
key: t('Key 指纹'), }
value: s.key_fp || params.key_fp || '-', if (hasTextValue(usingGroup)) {
}, data.push({ key: t('分组'), value: usingGroup });
{ key: t('TTL'), value: windowSeconds > 0 ? windowSeconds : '-' }, }
{ if (hasTextValue(keyHint)) {
key: t('命中率'), data.push({ key: t('Key 摘要'), value: keyHint });
value: `${hit}/${total} (${formatRate(hit, total)})`, }
}, if (hasTextValue(keyFp)) {
{ data.push({ key: t('Key 指纹'), value: keyFp });
key: t('Prompt tokens'), }
value: promptTokens, if (windowSeconds > 0) {
}, data.push({ key: t('TTL'), value: windowSeconds });
{ }
key: t('Cached tokens'), if (total > 0) {
value: `${cachedTokens} (${formatTokenRate(cachedTokens, promptTokens)})`, data.push({ key: t('命中率'), value: `${hit}/${total} (${formatRate(hit, total)})` });
}, }
{ if (lastSeenAt > 0) {
key: t('Prompt cache hit tokens'), data.push({ key: t('最近一次'), value: timestamp2string(lastSeenAt) });
value: promptCacheHitTokens, }
},
{ if (supportsTokenStats) {
key: t('Completion tokens'), if (promptTokens > 0) {
value: completionTokens, data.push({ key: t('Prompt tokens'), value: promptTokens });
}, }
{ if (promptTokens > 0 || cachedTokens > 0) {
key: t('Total tokens'), data.push({
value: totalTokens, key: t('Cached tokens'),
}, value: `${cachedTokens} (${formatCachedTokenRate(cachedTokens, promptTokens, cachedTokenRateMode)})`,
{ });
key: t('最近一次'), }
value: lastSeenAt > 0 ? timestamp2string(lastSeenAt) : '-', if (promptCacheHitTokens > 0) {
}, data.push({ key: t('Prompt cache hit tokens'), value: promptCacheHitTokens });
]; }
if (completionTokens > 0) {
data.push({ key: t('Completion tokens'), value: completionTokens });
}
if (totalTokens > 0) {
data.push({ key: t('Total tokens'), value: totalTokens });
}
}
return { rows: data, supportsTokenStats };
}, [stats, params, t]); }, [stats, params, t]);
return ( return (
@@ -179,15 +208,27 @@ const ChannelAffinityUsageCacheModal = ({
{t( {t(
'命中判定usage 中存在 cached tokens例如 cached_tokens/prompt_cache_hit_tokens即视为命中。', '命中判定usage 中存在 cached tokens例如 cached_tokens/prompt_cache_hit_tokens即视为命中。',
)} )}
{' '}
{t(
'Cached tokens 占比口径由后端返回Claude 语义按 cached/(prompt+cached),其余按 cached/prompt。',
)}
{' '}
{t('当前仅 OpenAI / Claude 语义支持缓存 token 统计,其他通道将隐藏 token 相关字段。')}
{stats && !supportsTokenStats ? (
<>
{' '}
{t('该记录不包含可用的 token 统计口径。')}
</>
) : null}
</Text> </Text>
</div> </div>
<Spin spinning={loading} tip={t('加载中...')}> <Spin spinning={loading} tip={t('加载中...')}>
{stats ? ( {stats && rows.length > 0 ? (
<Descriptions data={rows} /> <Descriptions data={rows} />
) : ( ) : (
<div style={{ padding: '24px 0' }}> <div style={{ padding: '24px 0' }}>
<Text type='tertiary' size='small'> <Text type='tertiary' size='small'>
{loading ? t('加载中...') : t('暂无数据')} {loading ? t('加载中...') : t('暂无可展示数据')}
</Text> </Text>
</div> </div>
)} )}