From bb848b2fe0d3fe1c6a2c4610dc0b568fdb3e2775 Mon Sep 17 00:00:00 2001 From: "1808837298@qq.com" <1808837298@qq.com> Date: Sat, 8 Mar 2025 16:44:08 +0800 Subject: [PATCH 1/3] refactor: Improve quota calculation precision using floating-point arithmetic --- relay/relay-text.go | 17 +++++++++-------- service/quota.go | 13 +++++++------ 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/relay/relay-text.go b/relay/relay-text.go index ddf6767d..af1eeca5 100644 --- a/relay/relay-text.go +++ b/relay/relay-text.go @@ -320,19 +320,20 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, groupRatio := priceData.GroupRatio modelPrice := priceData.ModelPrice - quota := 0 + quotaCalculate := 0.0 if !priceData.UsePrice { - quota = (promptTokens - cacheTokens) + int(math.Round(float64(cacheTokens)*cacheRatio)) - quota += int(math.Round(float64(completionTokens) * completionRatio)) - quota = int(math.Round(float64(quota) * ratio)) - if ratio != 0 && quota <= 0 { - quota = 1 + quotaCalculate = float64(promptTokens-cacheTokens) + float64(cacheTokens)*cacheRatio + quotaCalculate += float64(completionTokens) * completionRatio + quotaCalculate = quotaCalculate * ratio + if ratio != 0 && quotaCalculate <= 0 { + quotaCalculate = 1 } } else { - quota = int(modelPrice * common.QuotaPerUnit * groupRatio) + quotaCalculate = modelPrice * common.QuotaPerUnit * groupRatio } + quota := int(quotaCalculate) totalTokens := promptTokens + completionTokens - + var logContent string if !priceData.UsePrice { logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio) diff --git a/service/quota.go b/service/quota.go index e4499ff9..6fec7252 100644 --- a/service/quota.go +++ b/service/quota.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "github.com/bytedance/gopkg/util/gopool" - "math" "one-api/common" constant2 "one-api/constant" "one-api/dto" @@ -44,16 +43,18 @@ func calculateAudioQuota(info QuotaInfo) int { audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName) ratio := info.GroupRatio * info.ModelRatio - quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio)) - quota += int(math.Round(float64(info.InputDetails.AudioTokens)*audioRatio)) + - int(math.Round(float64(info.OutputDetails.AudioTokens)*audioRatio*audioCompletionRatio)) + quota := 0.0 + quota += float64(info.InputDetails.TextTokens) + quota += float64(info.OutputDetails.TextTokens) * completionRatio + quota += float64(info.InputDetails.AudioTokens) * audioRatio + quota += float64(info.OutputDetails.AudioTokens) * audioRatio * audioCompletionRatio - quota = int(math.Round(float64(quota) * ratio)) + quota = quota * ratio if ratio != 0 && quota <= 0 { quota = 1 } - return quota + return int(quota) } func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage) error { From a9bfcb0daf19515a9b012114a25cb5ea5f748b91 Mon Sep 17 00:00:00 2001 From: "1808837298@qq.com" <1808837298@qq.com> Date: Sat, 8 Mar 2025 16:50:24 +0800 Subject: [PATCH 2/3] feat: Add prompt cache hit tokens support for DeepSeek channel #406 --- dto/openai_response.go | 1 + relay/channel/openai/relay-openai.go | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/dto/openai_response.go b/dto/openai_response.go index a1d728fe..9188fad7 100644 --- a/dto/openai_response.go +++ b/dto/openai_response.go @@ -166,6 +166,7 @@ type Usage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` + PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"` PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"` CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"` } diff --git a/relay/channel/openai/relay-openai.go b/relay/channel/openai/relay-openai.go index 223ddd3d..ffd36d3c 100644 --- a/relay/channel/openai/relay-openai.go +++ b/relay/channel/openai/relay-openai.go @@ -254,6 +254,12 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel if !containStreamUsage { usage, _ = service.ResponseText2Usage(responseTextBuilder.String(), info.UpstreamModelName, info.PromptTokens) usage.CompletionTokens += toolCount * 7 + } else { + if info.ChannelType == common.ChannelTypeDeepSeek { + if usage.PromptCacheHitTokens != 0 { + usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens + } + } } if info.ShouldIncludeUsage && !containStreamUsage { From 8c209e2fb929fe99f2bc70df63f03d325ed4cbaf Mon Sep 17 00:00:00 2001 From: "1808837298@qq.com" <1808837298@qq.com> Date: Sat, 8 Mar 2025 16:51:43 +0800 Subject: [PATCH 3/3] fix: Adjust DeepSeek cache ratio to 0.1 --- setting/operation_setting/cache_ratio.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/setting/operation_setting/cache_ratio.go b/setting/operation_setting/cache_ratio.go index d7a3d973..545a5892 100644 --- a/setting/operation_setting/cache_ratio.go +++ b/setting/operation_setting/cache_ratio.go @@ -16,9 +16,9 @@ var defaultCacheRatio = map[string]float64{ "gpt-4o-mini-2024-07-18": 0.5, "gpt-4o-realtime-preview": 0.5, "gpt-4o-mini-realtime-preview": 0.5, - "deepseek-chat": 0.5, - "deepseek-reasoner": 0.5, - "deepseek-coder": 0.5, + "deepseek-chat": 0.1, + "deepseek-reasoner": 0.1, + "deepseek-coder": 0.1, } var defaultCreateCacheRatio = map[string]float64{}