diff --git a/dto/openai_response.go b/dto/openai_response.go index a1d728fe..9188fad7 100644 --- a/dto/openai_response.go +++ b/dto/openai_response.go @@ -166,6 +166,7 @@ type Usage struct { PromptTokens int `json:"prompt_tokens"` CompletionTokens int `json:"completion_tokens"` TotalTokens int `json:"total_tokens"` + PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"` PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"` CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"` } diff --git a/relay/channel/openai/relay-openai.go b/relay/channel/openai/relay-openai.go index 223ddd3d..ffd36d3c 100644 --- a/relay/channel/openai/relay-openai.go +++ b/relay/channel/openai/relay-openai.go @@ -254,6 +254,12 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel if !containStreamUsage { usage, _ = service.ResponseText2Usage(responseTextBuilder.String(), info.UpstreamModelName, info.PromptTokens) usage.CompletionTokens += toolCount * 7 + } else { + if info.ChannelType == common.ChannelTypeDeepSeek { + if usage.PromptCacheHitTokens != 0 { + usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens + } + } } if info.ShouldIncludeUsage && !containStreamUsage { diff --git a/relay/relay-text.go b/relay/relay-text.go index ddf6767d..af1eeca5 100644 --- a/relay/relay-text.go +++ b/relay/relay-text.go @@ -320,19 +320,20 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, groupRatio := priceData.GroupRatio modelPrice := priceData.ModelPrice - quota := 0 + quotaCalculate := 0.0 if !priceData.UsePrice { - quota = (promptTokens - cacheTokens) + int(math.Round(float64(cacheTokens)*cacheRatio)) - quota += int(math.Round(float64(completionTokens) * completionRatio)) - quota = int(math.Round(float64(quota) * ratio)) - if ratio != 0 && quota <= 0 { - quota = 1 + quotaCalculate = float64(promptTokens-cacheTokens) + float64(cacheTokens)*cacheRatio + quotaCalculate += float64(completionTokens) * completionRatio + quotaCalculate = quotaCalculate * ratio + if ratio != 0 && quotaCalculate <= 0 { + quotaCalculate = 1 } } else { - quota = int(modelPrice * common.QuotaPerUnit * groupRatio) + quotaCalculate = modelPrice * common.QuotaPerUnit * groupRatio } + quota := int(quotaCalculate) totalTokens := promptTokens + completionTokens - + var logContent string if !priceData.UsePrice { logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio) diff --git a/service/quota.go b/service/quota.go index e4499ff9..6fec7252 100644 --- a/service/quota.go +++ b/service/quota.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" "github.com/bytedance/gopkg/util/gopool" - "math" "one-api/common" constant2 "one-api/constant" "one-api/dto" @@ -44,16 +43,18 @@ func calculateAudioQuota(info QuotaInfo) int { audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName) ratio := info.GroupRatio * info.ModelRatio - quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio)) - quota += int(math.Round(float64(info.InputDetails.AudioTokens)*audioRatio)) + - int(math.Round(float64(info.OutputDetails.AudioTokens)*audioRatio*audioCompletionRatio)) + quota := 0.0 + quota += float64(info.InputDetails.TextTokens) + quota += float64(info.OutputDetails.TextTokens) * completionRatio + quota += float64(info.InputDetails.AudioTokens) * audioRatio + quota += float64(info.OutputDetails.AudioTokens) * audioRatio * audioCompletionRatio - quota = int(math.Round(float64(quota) * ratio)) + quota = quota * ratio if ratio != 0 && quota <= 0 { quota = 1 } - return quota + return int(quota) } func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage) error { diff --git a/setting/operation_setting/cache_ratio.go b/setting/operation_setting/cache_ratio.go index d7a3d973..545a5892 100644 --- a/setting/operation_setting/cache_ratio.go +++ b/setting/operation_setting/cache_ratio.go @@ -16,9 +16,9 @@ var defaultCacheRatio = map[string]float64{ "gpt-4o-mini-2024-07-18": 0.5, "gpt-4o-realtime-preview": 0.5, "gpt-4o-mini-realtime-preview": 0.5, - "deepseek-chat": 0.5, - "deepseek-reasoner": 0.5, - "deepseek-coder": 0.5, + "deepseek-chat": 0.1, + "deepseek-reasoner": 0.1, + "deepseek-coder": 0.1, } var defaultCreateCacheRatio = map[string]float64{}