fix: claude affinity cache counter (#2980)
* fix: claude affinity cache counter * fix: claude affinity cache counter * fix: stabilize cache usage stats format and simplify modal rendering
This commit is contained in:
@@ -13,6 +13,7 @@ import (
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
"github.com/QuantumNous/new-api/pkg/cachex"
|
||||
"github.com/QuantumNous/new-api/setting/operation_setting"
|
||||
"github.com/QuantumNous/new-api/types"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/samber/hot"
|
||||
"github.com/tidwall/gjson"
|
||||
@@ -61,6 +62,12 @@ type ChannelAffinityStatsContext struct {
|
||||
TTLSeconds int64
|
||||
}
|
||||
|
||||
const (
|
||||
cacheTokenRateModeCachedOverPrompt = "cached_over_prompt"
|
||||
cacheTokenRateModeCachedOverPromptPlusCached = "cached_over_prompt_plus_cached"
|
||||
cacheTokenRateModeMixed = "mixed"
|
||||
)
|
||||
|
||||
type ChannelAffinityCacheStats struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
Total int `json:"total"`
|
||||
@@ -565,9 +572,10 @@ func RecordChannelAffinity(c *gin.Context, channelID int) {
|
||||
}
|
||||
|
||||
type ChannelAffinityUsageCacheStats struct {
|
||||
RuleName string `json:"rule_name"`
|
||||
UsingGroup string `json:"using_group"`
|
||||
KeyFingerprint string `json:"key_fp"`
|
||||
RuleName string `json:"rule_name"`
|
||||
UsingGroup string `json:"using_group"`
|
||||
KeyFingerprint string `json:"key_fp"`
|
||||
CachedTokenRateMode string `json:"cached_token_rate_mode"`
|
||||
|
||||
Hit int64 `json:"hit"`
|
||||
Total int64 `json:"total"`
|
||||
@@ -582,6 +590,8 @@ type ChannelAffinityUsageCacheStats struct {
|
||||
}
|
||||
|
||||
type ChannelAffinityUsageCacheCounters struct {
|
||||
CachedTokenRateMode string `json:"cached_token_rate_mode"`
|
||||
|
||||
Hit int64 `json:"hit"`
|
||||
Total int64 `json:"total"`
|
||||
WindowSeconds int64 `json:"window_seconds"`
|
||||
@@ -596,12 +606,17 @@ type ChannelAffinityUsageCacheCounters struct {
|
||||
|
||||
var channelAffinityUsageCacheStatsLocks [64]sync.Mutex
|
||||
|
||||
func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage) {
|
||||
// ObserveChannelAffinityUsageCacheByRelayFormat records usage cache stats with a stable rate mode derived from relay format.
|
||||
func ObserveChannelAffinityUsageCacheByRelayFormat(c *gin.Context, usage *dto.Usage, relayFormat types.RelayFormat) {
|
||||
ObserveChannelAffinityUsageCacheFromContext(c, usage, cachedTokenRateModeByRelayFormat(relayFormat))
|
||||
}
|
||||
|
||||
func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage, cachedTokenRateMode string) {
|
||||
statsCtx, ok := GetChannelAffinityStatsContext(c)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
observeChannelAffinityUsageCache(statsCtx, usage)
|
||||
observeChannelAffinityUsageCache(statsCtx, usage, cachedTokenRateMode)
|
||||
}
|
||||
|
||||
func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) ChannelAffinityUsageCacheStats {
|
||||
@@ -628,6 +643,7 @@ func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) Chann
|
||||
}
|
||||
}
|
||||
return ChannelAffinityUsageCacheStats{
|
||||
CachedTokenRateMode: v.CachedTokenRateMode,
|
||||
RuleName: ruleName,
|
||||
UsingGroup: usingGroup,
|
||||
KeyFingerprint: keyFp,
|
||||
@@ -643,7 +659,7 @@ func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) Chann
|
||||
}
|
||||
}
|
||||
|
||||
func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage) {
|
||||
func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage, cachedTokenRateMode string) {
|
||||
entryKey := channelAffinityUsageCacheEntryKey(statsCtx.RuleName, statsCtx.UsingGroup, statsCtx.KeyFingerprint)
|
||||
if entryKey == "" {
|
||||
return
|
||||
@@ -669,6 +685,14 @@ func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usag
|
||||
if !found {
|
||||
next = ChannelAffinityUsageCacheCounters{}
|
||||
}
|
||||
currentMode := normalizeCachedTokenRateMode(cachedTokenRateMode)
|
||||
if currentMode != "" {
|
||||
if next.CachedTokenRateMode == "" {
|
||||
next.CachedTokenRateMode = currentMode
|
||||
} else if next.CachedTokenRateMode != currentMode && next.CachedTokenRateMode != cacheTokenRateModeMixed {
|
||||
next.CachedTokenRateMode = cacheTokenRateModeMixed
|
||||
}
|
||||
}
|
||||
next.Total++
|
||||
hit, cachedTokens, promptCacheHitTokens := usageCacheSignals(usage)
|
||||
if hit {
|
||||
@@ -684,6 +708,30 @@ func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usag
|
||||
_ = cache.SetWithTTL(entryKey, next, ttl)
|
||||
}
|
||||
|
||||
func normalizeCachedTokenRateMode(mode string) string {
|
||||
switch mode {
|
||||
case cacheTokenRateModeCachedOverPrompt:
|
||||
return cacheTokenRateModeCachedOverPrompt
|
||||
case cacheTokenRateModeCachedOverPromptPlusCached:
|
||||
return cacheTokenRateModeCachedOverPromptPlusCached
|
||||
case cacheTokenRateModeMixed:
|
||||
return cacheTokenRateModeMixed
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func cachedTokenRateModeByRelayFormat(relayFormat types.RelayFormat) string {
|
||||
switch relayFormat {
|
||||
case types.RelayFormatOpenAI, types.RelayFormatOpenAIResponses, types.RelayFormatOpenAIResponsesCompaction:
|
||||
return cacheTokenRateModeCachedOverPrompt
|
||||
case types.RelayFormatClaude:
|
||||
return cacheTokenRateModeCachedOverPromptPlusCached
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp string) string {
|
||||
ruleName = strings.TrimSpace(ruleName)
|
||||
usingGroup = strings.TrimSpace(usingGroup)
|
||||
|
||||
105
service/channel_affinity_usage_cache_test.go
Normal file
105
service/channel_affinity_usage_cache_test.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/QuantumNous/new-api/dto"
|
||||
"github.com/QuantumNous/new-api/types"
|
||||
"github.com/gin-gonic/gin"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP string) *gin.Context {
|
||||
rec := httptest.NewRecorder()
|
||||
ctx, _ := gin.CreateTestContext(rec)
|
||||
setChannelAffinityContext(ctx, channelAffinityMeta{
|
||||
CacheKey: fmt.Sprintf("test:%s:%s:%s", ruleName, usingGroup, keyFP),
|
||||
TTLSeconds: 600,
|
||||
RuleName: ruleName,
|
||||
UsingGroup: usingGroup,
|
||||
KeyFingerprint: keyFP,
|
||||
})
|
||||
return ctx
|
||||
}
|
||||
|
||||
func TestObserveChannelAffinityUsageCacheByRelayFormat_ClaudeMode(t *testing.T) {
|
||||
ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
|
||||
usingGroup := "default"
|
||||
keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
|
||||
ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: 100,
|
||||
CompletionTokens: 40,
|
||||
TotalTokens: 140,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 30,
|
||||
},
|
||||
}
|
||||
|
||||
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, types.RelayFormatClaude)
|
||||
stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP)
|
||||
|
||||
require.EqualValues(t, 1, stats.Total)
|
||||
require.EqualValues(t, 1, stats.Hit)
|
||||
require.EqualValues(t, 100, stats.PromptTokens)
|
||||
require.EqualValues(t, 40, stats.CompletionTokens)
|
||||
require.EqualValues(t, 140, stats.TotalTokens)
|
||||
require.EqualValues(t, 30, stats.CachedTokens)
|
||||
require.Equal(t, cacheTokenRateModeCachedOverPromptPlusCached, stats.CachedTokenRateMode)
|
||||
}
|
||||
|
||||
func TestObserveChannelAffinityUsageCacheByRelayFormat_MixedMode(t *testing.T) {
|
||||
ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
|
||||
usingGroup := "default"
|
||||
keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
|
||||
ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
|
||||
|
||||
openAIUsage := &dto.Usage{
|
||||
PromptTokens: 100,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 10,
|
||||
},
|
||||
}
|
||||
claudeUsage := &dto.Usage{
|
||||
PromptTokens: 80,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 20,
|
||||
},
|
||||
}
|
||||
|
||||
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, openAIUsage, types.RelayFormatOpenAI)
|
||||
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, claudeUsage, types.RelayFormatClaude)
|
||||
stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP)
|
||||
|
||||
require.EqualValues(t, 2, stats.Total)
|
||||
require.EqualValues(t, 2, stats.Hit)
|
||||
require.EqualValues(t, 180, stats.PromptTokens)
|
||||
require.EqualValues(t, 30, stats.CachedTokens)
|
||||
require.Equal(t, cacheTokenRateModeMixed, stats.CachedTokenRateMode)
|
||||
}
|
||||
|
||||
func TestObserveChannelAffinityUsageCacheByRelayFormat_UnsupportedModeKeepsEmpty(t *testing.T) {
|
||||
ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
|
||||
usingGroup := "default"
|
||||
keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
|
||||
ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
|
||||
|
||||
usage := &dto.Usage{
|
||||
PromptTokens: 100,
|
||||
PromptTokensDetails: dto.InputTokenDetails{
|
||||
CachedTokens: 25,
|
||||
},
|
||||
}
|
||||
|
||||
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, types.RelayFormatGemini)
|
||||
stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP)
|
||||
|
||||
require.EqualValues(t, 1, stats.Total)
|
||||
require.EqualValues(t, 1, stats.Hit)
|
||||
require.EqualValues(t, 25, stats.CachedTokens)
|
||||
require.Equal(t, "", stats.CachedTokenRateMode)
|
||||
}
|
||||
@@ -236,6 +236,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
|
||||
}
|
||||
|
||||
func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) {
|
||||
if usage != nil {
|
||||
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
|
||||
}
|
||||
|
||||
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
||||
promptTokens := usage.PromptTokens
|
||||
|
||||
Reference in New Issue
Block a user