fix: claude affinity cache counter (#2980)
* fix: claude affinity cache counter * fix: claude affinity cache counter * fix: stabilize cache usage stats format and simplify modal rendering
This commit is contained in:
@@ -152,7 +152,8 @@ type RelayInfo struct {
|
|||||||
// RequestConversionChain records request format conversions in order, e.g.
|
// RequestConversionChain records request format conversions in order, e.g.
|
||||||
// ["openai", "openai_responses"] or ["openai", "claude"].
|
// ["openai", "openai_responses"] or ["openai", "claude"].
|
||||||
RequestConversionChain []types.RelayFormat
|
RequestConversionChain []types.RelayFormat
|
||||||
// 最终请求到上游的格式 TODO: 当前仅设置了Claude
|
// 最终请求到上游的格式。可由 adaptor 显式设置;
|
||||||
|
// 若为空,调用 GetFinalRequestRelayFormat 会回退到 RequestConversionChain 的最后一项或 RelayFormat。
|
||||||
FinalRequestRelayFormat types.RelayFormat
|
FinalRequestRelayFormat types.RelayFormat
|
||||||
|
|
||||||
ThinkingContentInfo
|
ThinkingContentInfo
|
||||||
@@ -579,6 +580,19 @@ func (info *RelayInfo) AppendRequestConversion(format types.RelayFormat) {
|
|||||||
info.RequestConversionChain = append(info.RequestConversionChain, format)
|
info.RequestConversionChain = append(info.RequestConversionChain, format)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (info *RelayInfo) GetFinalRequestRelayFormat() types.RelayFormat {
|
||||||
|
if info == nil {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if info.FinalRequestRelayFormat != "" {
|
||||||
|
return info.FinalRequestRelayFormat
|
||||||
|
}
|
||||||
|
if n := len(info.RequestConversionChain); n > 0 {
|
||||||
|
return info.RequestConversionChain[n-1]
|
||||||
|
}
|
||||||
|
return info.RelayFormat
|
||||||
|
}
|
||||||
|
|
||||||
func GenRelayInfoResponsesCompaction(c *gin.Context, request *dto.OpenAIResponsesCompactionRequest) *RelayInfo {
|
func GenRelayInfoResponsesCompaction(c *gin.Context, request *dto.OpenAIResponsesCompactionRequest) *RelayInfo {
|
||||||
info := genBaseRelayInfo(c, request)
|
info := genBaseRelayInfo(c, request)
|
||||||
if info.RelayMode == relayconstant.RelayModeUnknown {
|
if info.RelayMode == relayconstant.RelayModeUnknown {
|
||||||
|
|||||||
40
relay/common/relay_info_test.go
Normal file
40
relay/common/relay_info_test.go
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
package common
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/QuantumNous/new-api/types"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestRelayInfoGetFinalRequestRelayFormatPrefersExplicitFinal(t *testing.T) {
|
||||||
|
info := &RelayInfo{
|
||||||
|
RelayFormat: types.RelayFormatOpenAI,
|
||||||
|
RequestConversionChain: []types.RelayFormat{types.RelayFormatOpenAI, types.RelayFormatClaude},
|
||||||
|
FinalRequestRelayFormat: types.RelayFormatOpenAIResponses,
|
||||||
|
}
|
||||||
|
|
||||||
|
require.Equal(t, types.RelayFormat(types.RelayFormatOpenAIResponses), info.GetFinalRequestRelayFormat())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRelayInfoGetFinalRequestRelayFormatFallsBackToConversionChain(t *testing.T) {
|
||||||
|
info := &RelayInfo{
|
||||||
|
RelayFormat: types.RelayFormatOpenAI,
|
||||||
|
RequestConversionChain: []types.RelayFormat{types.RelayFormatOpenAI, types.RelayFormatClaude},
|
||||||
|
}
|
||||||
|
|
||||||
|
require.Equal(t, types.RelayFormat(types.RelayFormatClaude), info.GetFinalRequestRelayFormat())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRelayInfoGetFinalRequestRelayFormatFallsBackToRelayFormat(t *testing.T) {
|
||||||
|
info := &RelayInfo{
|
||||||
|
RelayFormat: types.RelayFormatGemini,
|
||||||
|
}
|
||||||
|
|
||||||
|
require.Equal(t, types.RelayFormat(types.RelayFormatGemini), info.GetFinalRequestRelayFormat())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRelayInfoGetFinalRequestRelayFormatNilReceiver(t *testing.T) {
|
||||||
|
var info *RelayInfo
|
||||||
|
require.Equal(t, types.RelayFormat(""), info.GetFinalRequestRelayFormat())
|
||||||
|
}
|
||||||
@@ -232,7 +232,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
|
|||||||
}
|
}
|
||||||
|
|
||||||
if originUsage != nil {
|
if originUsage != nil {
|
||||||
service.ObserveChannelAffinityUsageCacheFromContext(ctx, usage)
|
service.ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
|
||||||
}
|
}
|
||||||
|
|
||||||
adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
|
adminRejectReason := common.GetContextKeyString(ctx, constant.ContextKeyAdminRejectReason)
|
||||||
@@ -336,7 +336,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage
|
|||||||
|
|
||||||
var audioInputQuota decimal.Decimal
|
var audioInputQuota decimal.Decimal
|
||||||
var audioInputPrice float64
|
var audioInputPrice float64
|
||||||
isClaudeUsageSemantic := relayInfo.FinalRequestRelayFormat == types.RelayFormatClaude
|
isClaudeUsageSemantic := relayInfo.GetFinalRequestRelayFormat() == types.RelayFormatClaude
|
||||||
if !relayInfo.PriceData.UsePrice {
|
if !relayInfo.PriceData.UsePrice {
|
||||||
baseTokens := dPromptTokens
|
baseTokens := dPromptTokens
|
||||||
// 减去 cached tokens
|
// 减去 cached tokens
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ import (
|
|||||||
"github.com/QuantumNous/new-api/dto"
|
"github.com/QuantumNous/new-api/dto"
|
||||||
"github.com/QuantumNous/new-api/pkg/cachex"
|
"github.com/QuantumNous/new-api/pkg/cachex"
|
||||||
"github.com/QuantumNous/new-api/setting/operation_setting"
|
"github.com/QuantumNous/new-api/setting/operation_setting"
|
||||||
|
"github.com/QuantumNous/new-api/types"
|
||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"github.com/samber/hot"
|
"github.com/samber/hot"
|
||||||
"github.com/tidwall/gjson"
|
"github.com/tidwall/gjson"
|
||||||
@@ -61,6 +62,12 @@ type ChannelAffinityStatsContext struct {
|
|||||||
TTLSeconds int64
|
TTLSeconds int64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const (
|
||||||
|
cacheTokenRateModeCachedOverPrompt = "cached_over_prompt"
|
||||||
|
cacheTokenRateModeCachedOverPromptPlusCached = "cached_over_prompt_plus_cached"
|
||||||
|
cacheTokenRateModeMixed = "mixed"
|
||||||
|
)
|
||||||
|
|
||||||
type ChannelAffinityCacheStats struct {
|
type ChannelAffinityCacheStats struct {
|
||||||
Enabled bool `json:"enabled"`
|
Enabled bool `json:"enabled"`
|
||||||
Total int `json:"total"`
|
Total int `json:"total"`
|
||||||
@@ -565,9 +572,10 @@ func RecordChannelAffinity(c *gin.Context, channelID int) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type ChannelAffinityUsageCacheStats struct {
|
type ChannelAffinityUsageCacheStats struct {
|
||||||
RuleName string `json:"rule_name"`
|
RuleName string `json:"rule_name"`
|
||||||
UsingGroup string `json:"using_group"`
|
UsingGroup string `json:"using_group"`
|
||||||
KeyFingerprint string `json:"key_fp"`
|
KeyFingerprint string `json:"key_fp"`
|
||||||
|
CachedTokenRateMode string `json:"cached_token_rate_mode"`
|
||||||
|
|
||||||
Hit int64 `json:"hit"`
|
Hit int64 `json:"hit"`
|
||||||
Total int64 `json:"total"`
|
Total int64 `json:"total"`
|
||||||
@@ -582,6 +590,8 @@ type ChannelAffinityUsageCacheStats struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type ChannelAffinityUsageCacheCounters struct {
|
type ChannelAffinityUsageCacheCounters struct {
|
||||||
|
CachedTokenRateMode string `json:"cached_token_rate_mode"`
|
||||||
|
|
||||||
Hit int64 `json:"hit"`
|
Hit int64 `json:"hit"`
|
||||||
Total int64 `json:"total"`
|
Total int64 `json:"total"`
|
||||||
WindowSeconds int64 `json:"window_seconds"`
|
WindowSeconds int64 `json:"window_seconds"`
|
||||||
@@ -596,12 +606,17 @@ type ChannelAffinityUsageCacheCounters struct {
|
|||||||
|
|
||||||
var channelAffinityUsageCacheStatsLocks [64]sync.Mutex
|
var channelAffinityUsageCacheStatsLocks [64]sync.Mutex
|
||||||
|
|
||||||
func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage) {
|
// ObserveChannelAffinityUsageCacheByRelayFormat records usage cache stats with a stable rate mode derived from relay format.
|
||||||
|
func ObserveChannelAffinityUsageCacheByRelayFormat(c *gin.Context, usage *dto.Usage, relayFormat types.RelayFormat) {
|
||||||
|
ObserveChannelAffinityUsageCacheFromContext(c, usage, cachedTokenRateModeByRelayFormat(relayFormat))
|
||||||
|
}
|
||||||
|
|
||||||
|
func ObserveChannelAffinityUsageCacheFromContext(c *gin.Context, usage *dto.Usage, cachedTokenRateMode string) {
|
||||||
statsCtx, ok := GetChannelAffinityStatsContext(c)
|
statsCtx, ok := GetChannelAffinityStatsContext(c)
|
||||||
if !ok {
|
if !ok {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
observeChannelAffinityUsageCache(statsCtx, usage)
|
observeChannelAffinityUsageCache(statsCtx, usage, cachedTokenRateMode)
|
||||||
}
|
}
|
||||||
|
|
||||||
func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) ChannelAffinityUsageCacheStats {
|
func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) ChannelAffinityUsageCacheStats {
|
||||||
@@ -628,6 +643,7 @@ func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) Chann
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ChannelAffinityUsageCacheStats{
|
return ChannelAffinityUsageCacheStats{
|
||||||
|
CachedTokenRateMode: v.CachedTokenRateMode,
|
||||||
RuleName: ruleName,
|
RuleName: ruleName,
|
||||||
UsingGroup: usingGroup,
|
UsingGroup: usingGroup,
|
||||||
KeyFingerprint: keyFp,
|
KeyFingerprint: keyFp,
|
||||||
@@ -643,7 +659,7 @@ func GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFp string) Chann
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage) {
|
func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usage *dto.Usage, cachedTokenRateMode string) {
|
||||||
entryKey := channelAffinityUsageCacheEntryKey(statsCtx.RuleName, statsCtx.UsingGroup, statsCtx.KeyFingerprint)
|
entryKey := channelAffinityUsageCacheEntryKey(statsCtx.RuleName, statsCtx.UsingGroup, statsCtx.KeyFingerprint)
|
||||||
if entryKey == "" {
|
if entryKey == "" {
|
||||||
return
|
return
|
||||||
@@ -669,6 +685,14 @@ func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usag
|
|||||||
if !found {
|
if !found {
|
||||||
next = ChannelAffinityUsageCacheCounters{}
|
next = ChannelAffinityUsageCacheCounters{}
|
||||||
}
|
}
|
||||||
|
currentMode := normalizeCachedTokenRateMode(cachedTokenRateMode)
|
||||||
|
if currentMode != "" {
|
||||||
|
if next.CachedTokenRateMode == "" {
|
||||||
|
next.CachedTokenRateMode = currentMode
|
||||||
|
} else if next.CachedTokenRateMode != currentMode && next.CachedTokenRateMode != cacheTokenRateModeMixed {
|
||||||
|
next.CachedTokenRateMode = cacheTokenRateModeMixed
|
||||||
|
}
|
||||||
|
}
|
||||||
next.Total++
|
next.Total++
|
||||||
hit, cachedTokens, promptCacheHitTokens := usageCacheSignals(usage)
|
hit, cachedTokens, promptCacheHitTokens := usageCacheSignals(usage)
|
||||||
if hit {
|
if hit {
|
||||||
@@ -684,6 +708,30 @@ func observeChannelAffinityUsageCache(statsCtx ChannelAffinityStatsContext, usag
|
|||||||
_ = cache.SetWithTTL(entryKey, next, ttl)
|
_ = cache.SetWithTTL(entryKey, next, ttl)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func normalizeCachedTokenRateMode(mode string) string {
|
||||||
|
switch mode {
|
||||||
|
case cacheTokenRateModeCachedOverPrompt:
|
||||||
|
return cacheTokenRateModeCachedOverPrompt
|
||||||
|
case cacheTokenRateModeCachedOverPromptPlusCached:
|
||||||
|
return cacheTokenRateModeCachedOverPromptPlusCached
|
||||||
|
case cacheTokenRateModeMixed:
|
||||||
|
return cacheTokenRateModeMixed
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func cachedTokenRateModeByRelayFormat(relayFormat types.RelayFormat) string {
|
||||||
|
switch relayFormat {
|
||||||
|
case types.RelayFormatOpenAI, types.RelayFormatOpenAIResponses, types.RelayFormatOpenAIResponsesCompaction:
|
||||||
|
return cacheTokenRateModeCachedOverPrompt
|
||||||
|
case types.RelayFormatClaude:
|
||||||
|
return cacheTokenRateModeCachedOverPromptPlusCached
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp string) string {
|
func channelAffinityUsageCacheEntryKey(ruleName, usingGroup, keyFp string) string {
|
||||||
ruleName = strings.TrimSpace(ruleName)
|
ruleName = strings.TrimSpace(ruleName)
|
||||||
usingGroup = strings.TrimSpace(usingGroup)
|
usingGroup = strings.TrimSpace(usingGroup)
|
||||||
|
|||||||
105
service/channel_affinity_usage_cache_test.go
Normal file
105
service/channel_affinity_usage_cache_test.go
Normal file
@@ -0,0 +1,105 @@
|
|||||||
|
package service
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"net/http/httptest"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/QuantumNous/new-api/dto"
|
||||||
|
"github.com/QuantumNous/new-api/types"
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP string) *gin.Context {
|
||||||
|
rec := httptest.NewRecorder()
|
||||||
|
ctx, _ := gin.CreateTestContext(rec)
|
||||||
|
setChannelAffinityContext(ctx, channelAffinityMeta{
|
||||||
|
CacheKey: fmt.Sprintf("test:%s:%s:%s", ruleName, usingGroup, keyFP),
|
||||||
|
TTLSeconds: 600,
|
||||||
|
RuleName: ruleName,
|
||||||
|
UsingGroup: usingGroup,
|
||||||
|
KeyFingerprint: keyFP,
|
||||||
|
})
|
||||||
|
return ctx
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestObserveChannelAffinityUsageCacheByRelayFormat_ClaudeMode(t *testing.T) {
|
||||||
|
ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
|
||||||
|
usingGroup := "default"
|
||||||
|
keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
|
||||||
|
ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
|
||||||
|
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 100,
|
||||||
|
CompletionTokens: 40,
|
||||||
|
TotalTokens: 140,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 30,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, types.RelayFormatClaude)
|
||||||
|
stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP)
|
||||||
|
|
||||||
|
require.EqualValues(t, 1, stats.Total)
|
||||||
|
require.EqualValues(t, 1, stats.Hit)
|
||||||
|
require.EqualValues(t, 100, stats.PromptTokens)
|
||||||
|
require.EqualValues(t, 40, stats.CompletionTokens)
|
||||||
|
require.EqualValues(t, 140, stats.TotalTokens)
|
||||||
|
require.EqualValues(t, 30, stats.CachedTokens)
|
||||||
|
require.Equal(t, cacheTokenRateModeCachedOverPromptPlusCached, stats.CachedTokenRateMode)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestObserveChannelAffinityUsageCacheByRelayFormat_MixedMode(t *testing.T) {
|
||||||
|
ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
|
||||||
|
usingGroup := "default"
|
||||||
|
keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
|
||||||
|
ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
|
||||||
|
|
||||||
|
openAIUsage := &dto.Usage{
|
||||||
|
PromptTokens: 100,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 10,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
claudeUsage := &dto.Usage{
|
||||||
|
PromptTokens: 80,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 20,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, openAIUsage, types.RelayFormatOpenAI)
|
||||||
|
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, claudeUsage, types.RelayFormatClaude)
|
||||||
|
stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP)
|
||||||
|
|
||||||
|
require.EqualValues(t, 2, stats.Total)
|
||||||
|
require.EqualValues(t, 2, stats.Hit)
|
||||||
|
require.EqualValues(t, 180, stats.PromptTokens)
|
||||||
|
require.EqualValues(t, 30, stats.CachedTokens)
|
||||||
|
require.Equal(t, cacheTokenRateModeMixed, stats.CachedTokenRateMode)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestObserveChannelAffinityUsageCacheByRelayFormat_UnsupportedModeKeepsEmpty(t *testing.T) {
|
||||||
|
ruleName := fmt.Sprintf("rule_%d", time.Now().UnixNano())
|
||||||
|
usingGroup := "default"
|
||||||
|
keyFP := fmt.Sprintf("fp_%d", time.Now().UnixNano())
|
||||||
|
ctx := buildChannelAffinityStatsContextForTest(ruleName, usingGroup, keyFP)
|
||||||
|
|
||||||
|
usage := &dto.Usage{
|
||||||
|
PromptTokens: 100,
|
||||||
|
PromptTokensDetails: dto.InputTokenDetails{
|
||||||
|
CachedTokens: 25,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, types.RelayFormatGemini)
|
||||||
|
stats := GetChannelAffinityUsageCacheStats(ruleName, usingGroup, keyFP)
|
||||||
|
|
||||||
|
require.EqualValues(t, 1, stats.Total)
|
||||||
|
require.EqualValues(t, 1, stats.Hit)
|
||||||
|
require.EqualValues(t, 25, stats.CachedTokens)
|
||||||
|
require.Equal(t, "", stats.CachedTokenRateMode)
|
||||||
|
}
|
||||||
@@ -236,6 +236,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
|
|||||||
}
|
}
|
||||||
|
|
||||||
func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) {
|
func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage) {
|
||||||
|
if usage != nil {
|
||||||
|
ObserveChannelAffinityUsageCacheByRelayFormat(ctx, usage, relayInfo.GetFinalRequestRelayFormat())
|
||||||
|
}
|
||||||
|
|
||||||
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
||||||
promptTokens := usage.PromptTokens
|
promptTokens := usage.PromptTokens
|
||||||
|
|||||||
@@ -39,6 +39,21 @@ function formatTokenRate(n, d) {
|
|||||||
return `${r.toFixed(2)}%`;
|
return `${r.toFixed(2)}%`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function formatCachedTokenRate(cachedTokens, promptTokens, mode) {
|
||||||
|
if (mode === 'cached_over_prompt_plus_cached') {
|
||||||
|
const denominator = Number(promptTokens || 0) + Number(cachedTokens || 0);
|
||||||
|
return formatTokenRate(cachedTokens, denominator);
|
||||||
|
}
|
||||||
|
if (mode === 'cached_over_prompt') {
|
||||||
|
return formatTokenRate(cachedTokens, promptTokens);
|
||||||
|
}
|
||||||
|
return '-';
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasTextValue(value) {
|
||||||
|
return typeof value === 'string' && value.trim() !== '';
|
||||||
|
}
|
||||||
|
|
||||||
const ChannelAffinityUsageCacheModal = ({
|
const ChannelAffinityUsageCacheModal = ({
|
||||||
t,
|
t,
|
||||||
showChannelAffinityUsageCacheModal,
|
showChannelAffinityUsageCacheModal,
|
||||||
@@ -107,7 +122,7 @@ const ChannelAffinityUsageCacheModal = ({
|
|||||||
t,
|
t,
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const rows = useMemo(() => {
|
const { rows, supportsTokenStats } = useMemo(() => {
|
||||||
const s = stats || {};
|
const s = stats || {};
|
||||||
const hit = Number(s.hit || 0);
|
const hit = Number(s.hit || 0);
|
||||||
const total = Number(s.total || 0);
|
const total = Number(s.total || 0);
|
||||||
@@ -118,48 +133,62 @@ const ChannelAffinityUsageCacheModal = ({
|
|||||||
const totalTokens = Number(s.total_tokens || 0);
|
const totalTokens = Number(s.total_tokens || 0);
|
||||||
const cachedTokens = Number(s.cached_tokens || 0);
|
const cachedTokens = Number(s.cached_tokens || 0);
|
||||||
const promptCacheHitTokens = Number(s.prompt_cache_hit_tokens || 0);
|
const promptCacheHitTokens = Number(s.prompt_cache_hit_tokens || 0);
|
||||||
|
const cachedTokenRateMode = String(s.cached_token_rate_mode || '').trim();
|
||||||
|
const supportsTokenStats =
|
||||||
|
cachedTokenRateMode === 'cached_over_prompt' ||
|
||||||
|
cachedTokenRateMode === 'cached_over_prompt_plus_cached' ||
|
||||||
|
cachedTokenRateMode === 'mixed';
|
||||||
|
|
||||||
return [
|
const data = [];
|
||||||
{ key: t('规则'), value: s.rule_name || params.rule_name || '-' },
|
const ruleName = String(s.rule_name || params.rule_name || '').trim();
|
||||||
{ key: t('分组'), value: s.using_group || params.using_group || '-' },
|
const usingGroup = String(s.using_group || params.using_group || '').trim();
|
||||||
{
|
const keyHint = String(params.key_hint || '').trim();
|
||||||
key: t('Key 摘要'),
|
const keyFp = String(s.key_fp || params.key_fp || '').trim();
|
||||||
value: params.key_hint || '-',
|
|
||||||
},
|
if (hasTextValue(ruleName)) {
|
||||||
{
|
data.push({ key: t('规则'), value: ruleName });
|
||||||
key: t('Key 指纹'),
|
}
|
||||||
value: s.key_fp || params.key_fp || '-',
|
if (hasTextValue(usingGroup)) {
|
||||||
},
|
data.push({ key: t('分组'), value: usingGroup });
|
||||||
{ key: t('TTL(秒)'), value: windowSeconds > 0 ? windowSeconds : '-' },
|
}
|
||||||
{
|
if (hasTextValue(keyHint)) {
|
||||||
key: t('命中率'),
|
data.push({ key: t('Key 摘要'), value: keyHint });
|
||||||
value: `${hit}/${total} (${formatRate(hit, total)})`,
|
}
|
||||||
},
|
if (hasTextValue(keyFp)) {
|
||||||
{
|
data.push({ key: t('Key 指纹'), value: keyFp });
|
||||||
key: t('Prompt tokens'),
|
}
|
||||||
value: promptTokens,
|
if (windowSeconds > 0) {
|
||||||
},
|
data.push({ key: t('TTL(秒)'), value: windowSeconds });
|
||||||
{
|
}
|
||||||
key: t('Cached tokens'),
|
if (total > 0) {
|
||||||
value: `${cachedTokens} (${formatTokenRate(cachedTokens, promptTokens)})`,
|
data.push({ key: t('命中率'), value: `${hit}/${total} (${formatRate(hit, total)})` });
|
||||||
},
|
}
|
||||||
{
|
if (lastSeenAt > 0) {
|
||||||
key: t('Prompt cache hit tokens'),
|
data.push({ key: t('最近一次'), value: timestamp2string(lastSeenAt) });
|
||||||
value: promptCacheHitTokens,
|
}
|
||||||
},
|
|
||||||
{
|
if (supportsTokenStats) {
|
||||||
key: t('Completion tokens'),
|
if (promptTokens > 0) {
|
||||||
value: completionTokens,
|
data.push({ key: t('Prompt tokens'), value: promptTokens });
|
||||||
},
|
}
|
||||||
{
|
if (promptTokens > 0 || cachedTokens > 0) {
|
||||||
key: t('Total tokens'),
|
data.push({
|
||||||
value: totalTokens,
|
key: t('Cached tokens'),
|
||||||
},
|
value: `${cachedTokens} (${formatCachedTokenRate(cachedTokens, promptTokens, cachedTokenRateMode)})`,
|
||||||
{
|
});
|
||||||
key: t('最近一次'),
|
}
|
||||||
value: lastSeenAt > 0 ? timestamp2string(lastSeenAt) : '-',
|
if (promptCacheHitTokens > 0) {
|
||||||
},
|
data.push({ key: t('Prompt cache hit tokens'), value: promptCacheHitTokens });
|
||||||
];
|
}
|
||||||
|
if (completionTokens > 0) {
|
||||||
|
data.push({ key: t('Completion tokens'), value: completionTokens });
|
||||||
|
}
|
||||||
|
if (totalTokens > 0) {
|
||||||
|
data.push({ key: t('Total tokens'), value: totalTokens });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { rows: data, supportsTokenStats };
|
||||||
}, [stats, params, t]);
|
}, [stats, params, t]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
@@ -179,15 +208,27 @@ const ChannelAffinityUsageCacheModal = ({
|
|||||||
{t(
|
{t(
|
||||||
'命中判定:usage 中存在 cached tokens(例如 cached_tokens/prompt_cache_hit_tokens)即视为命中。',
|
'命中判定:usage 中存在 cached tokens(例如 cached_tokens/prompt_cache_hit_tokens)即视为命中。',
|
||||||
)}
|
)}
|
||||||
|
{' '}
|
||||||
|
{t(
|
||||||
|
'Cached tokens 占比口径由后端返回:Claude 语义按 cached/(prompt+cached),其余按 cached/prompt。',
|
||||||
|
)}
|
||||||
|
{' '}
|
||||||
|
{t('当前仅 OpenAI / Claude 语义支持缓存 token 统计,其他通道将隐藏 token 相关字段。')}
|
||||||
|
{stats && !supportsTokenStats ? (
|
||||||
|
<>
|
||||||
|
{' '}
|
||||||
|
{t('该记录不包含可用的 token 统计口径。')}
|
||||||
|
</>
|
||||||
|
) : null}
|
||||||
</Text>
|
</Text>
|
||||||
</div>
|
</div>
|
||||||
<Spin spinning={loading} tip={t('加载中...')}>
|
<Spin spinning={loading} tip={t('加载中...')}>
|
||||||
{stats ? (
|
{stats && rows.length > 0 ? (
|
||||||
<Descriptions data={rows} />
|
<Descriptions data={rows} />
|
||||||
) : (
|
) : (
|
||||||
<div style={{ padding: '24px 0' }}>
|
<div style={{ padding: '24px 0' }}>
|
||||||
<Text type='tertiary' size='small'>
|
<Text type='tertiary' size='small'>
|
||||||
{loading ? t('加载中...') : t('暂无数据')}
|
{loading ? t('加载中...') : t('暂无可展示数据')}
|
||||||
</Text>
|
</Text>
|
||||||
</div>
|
</div>
|
||||||
)}
|
)}
|
||||||
|
|||||||
Reference in New Issue
Block a user