Merge pull request #826 from Calcium-Ion/cache
feat: Add prompt cache hit tokens support for DeepSeek channel #406
This commit is contained in:
@@ -166,6 +166,7 @@ type Usage struct {
|
|||||||
PromptTokens int `json:"prompt_tokens"`
|
PromptTokens int `json:"prompt_tokens"`
|
||||||
CompletionTokens int `json:"completion_tokens"`
|
CompletionTokens int `json:"completion_tokens"`
|
||||||
TotalTokens int `json:"total_tokens"`
|
TotalTokens int `json:"total_tokens"`
|
||||||
|
PromptCacheHitTokens int `json:"prompt_cache_hit_tokens,omitempty"`
|
||||||
PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"`
|
PromptTokensDetails InputTokenDetails `json:"prompt_tokens_details"`
|
||||||
CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
|
CompletionTokenDetails OutputTokenDetails `json:"completion_tokens_details"`
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -254,6 +254,12 @@ func OaiStreamHandler(c *gin.Context, resp *http.Response, info *relaycommon.Rel
|
|||||||
if !containStreamUsage {
|
if !containStreamUsage {
|
||||||
usage, _ = service.ResponseText2Usage(responseTextBuilder.String(), info.UpstreamModelName, info.PromptTokens)
|
usage, _ = service.ResponseText2Usage(responseTextBuilder.String(), info.UpstreamModelName, info.PromptTokens)
|
||||||
usage.CompletionTokens += toolCount * 7
|
usage.CompletionTokens += toolCount * 7
|
||||||
|
} else {
|
||||||
|
if info.ChannelType == common.ChannelTypeDeepSeek {
|
||||||
|
if usage.PromptCacheHitTokens != 0 {
|
||||||
|
usage.PromptTokensDetails.CachedTokens = usage.PromptCacheHitTokens
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if info.ShouldIncludeUsage && !containStreamUsage {
|
if info.ShouldIncludeUsage && !containStreamUsage {
|
||||||
|
|||||||
@@ -320,19 +320,20 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
|||||||
groupRatio := priceData.GroupRatio
|
groupRatio := priceData.GroupRatio
|
||||||
modelPrice := priceData.ModelPrice
|
modelPrice := priceData.ModelPrice
|
||||||
|
|
||||||
quota := 0
|
quotaCalculate := 0.0
|
||||||
if !priceData.UsePrice {
|
if !priceData.UsePrice {
|
||||||
quota = (promptTokens - cacheTokens) + int(math.Round(float64(cacheTokens)*cacheRatio))
|
quotaCalculate = float64(promptTokens-cacheTokens) + float64(cacheTokens)*cacheRatio
|
||||||
quota += int(math.Round(float64(completionTokens) * completionRatio))
|
quotaCalculate += float64(completionTokens) * completionRatio
|
||||||
quota = int(math.Round(float64(quota) * ratio))
|
quotaCalculate = quotaCalculate * ratio
|
||||||
if ratio != 0 && quota <= 0 {
|
if ratio != 0 && quotaCalculate <= 0 {
|
||||||
quota = 1
|
quotaCalculate = 1
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
quota = int(modelPrice * common.QuotaPerUnit * groupRatio)
|
quotaCalculate = modelPrice * common.QuotaPerUnit * groupRatio
|
||||||
}
|
}
|
||||||
|
quota := int(quotaCalculate)
|
||||||
totalTokens := promptTokens + completionTokens
|
totalTokens := promptTokens + completionTokens
|
||||||
|
|
||||||
var logContent string
|
var logContent string
|
||||||
if !priceData.UsePrice {
|
if !priceData.UsePrice {
|
||||||
logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio)
|
logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio)
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/bytedance/gopkg/util/gopool"
|
"github.com/bytedance/gopkg/util/gopool"
|
||||||
"math"
|
|
||||||
"one-api/common"
|
"one-api/common"
|
||||||
constant2 "one-api/constant"
|
constant2 "one-api/constant"
|
||||||
"one-api/dto"
|
"one-api/dto"
|
||||||
@@ -44,16 +43,18 @@ func calculateAudioQuota(info QuotaInfo) int {
|
|||||||
audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName)
|
audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName)
|
||||||
ratio := info.GroupRatio * info.ModelRatio
|
ratio := info.GroupRatio * info.ModelRatio
|
||||||
|
|
||||||
quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio))
|
quota := 0.0
|
||||||
quota += int(math.Round(float64(info.InputDetails.AudioTokens)*audioRatio)) +
|
quota += float64(info.InputDetails.TextTokens)
|
||||||
int(math.Round(float64(info.OutputDetails.AudioTokens)*audioRatio*audioCompletionRatio))
|
quota += float64(info.OutputDetails.TextTokens) * completionRatio
|
||||||
|
quota += float64(info.InputDetails.AudioTokens) * audioRatio
|
||||||
|
quota += float64(info.OutputDetails.AudioTokens) * audioRatio * audioCompletionRatio
|
||||||
|
|
||||||
quota = int(math.Round(float64(quota) * ratio))
|
quota = quota * ratio
|
||||||
if ratio != 0 && quota <= 0 {
|
if ratio != 0 && quota <= 0 {
|
||||||
quota = 1
|
quota = 1
|
||||||
}
|
}
|
||||||
|
|
||||||
return quota
|
return int(quota)
|
||||||
}
|
}
|
||||||
|
|
||||||
func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage) error {
|
func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage) error {
|
||||||
|
|||||||
@@ -16,9 +16,9 @@ var defaultCacheRatio = map[string]float64{
|
|||||||
"gpt-4o-mini-2024-07-18": 0.5,
|
"gpt-4o-mini-2024-07-18": 0.5,
|
||||||
"gpt-4o-realtime-preview": 0.5,
|
"gpt-4o-realtime-preview": 0.5,
|
||||||
"gpt-4o-mini-realtime-preview": 0.5,
|
"gpt-4o-mini-realtime-preview": 0.5,
|
||||||
"deepseek-chat": 0.5,
|
"deepseek-chat": 0.1,
|
||||||
"deepseek-reasoner": 0.5,
|
"deepseek-reasoner": 0.1,
|
||||||
"deepseek-coder": 0.5,
|
"deepseek-coder": 0.1,
|
||||||
}
|
}
|
||||||
|
|
||||||
var defaultCreateCacheRatio = map[string]float64{}
|
var defaultCreateCacheRatio = map[string]float64{}
|
||||||
|
|||||||
Reference in New Issue
Block a user