feat: Implement cache token ratio for more precise token pricing
This commit is contained in:
@@ -1,16 +1,20 @@
|
||||
package service
|
||||
|
||||
import (
|
||||
"github.com/gin-gonic/gin"
|
||||
"one-api/dto"
|
||||
relaycommon "one-api/relay/common"
|
||||
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio, modelPrice float64) map[string]interface{} {
|
||||
func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio float64,
|
||||
cacheTokens int, cacheRatio float64, modelPrice float64) map[string]interface{} {
|
||||
other := make(map[string]interface{})
|
||||
other["model_ratio"] = modelRatio
|
||||
other["group_ratio"] = groupRatio
|
||||
other["completion_ratio"] = completionRatio
|
||||
other["cache_tokens"] = cacheTokens
|
||||
other["cache_ratio"] = cacheRatio
|
||||
other["model_price"] = modelPrice
|
||||
other["frt"] = float64(relayInfo.FirstResponseTime.UnixMilli() - relayInfo.StartTime.UnixMilli())
|
||||
if relayInfo.ReasoningEffort != "" {
|
||||
@@ -27,7 +31,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m
|
||||
}
|
||||
|
||||
func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
|
||||
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice)
|
||||
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice)
|
||||
info["ws"] = true
|
||||
info["audio_input"] = usage.InputTokenDetails.AudioTokens
|
||||
info["audio_output"] = usage.OutputTokenDetails.AudioTokens
|
||||
@@ -39,7 +43,7 @@ func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, us
|
||||
}
|
||||
|
||||
func GenerateAudioOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
|
||||
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice)
|
||||
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice)
|
||||
info["audio"] = true
|
||||
info["audio_input"] = usage.PromptTokensDetails.AudioTokens
|
||||
info["audio_output"] = usage.CompletionTokenDetails.AudioTokens
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
relaycommon "one-api/relay/common"
|
||||
"one-api/relay/helper"
|
||||
"one-api/setting"
|
||||
"one-api/setting/operation_setting"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
@@ -38,9 +39,9 @@ func calculateAudioQuota(info QuotaInfo) int {
|
||||
return int(info.ModelPrice * common.QuotaPerUnit * info.GroupRatio)
|
||||
}
|
||||
|
||||
completionRatio := setting.GetCompletionRatio(info.ModelName)
|
||||
audioRatio := setting.GetAudioRatio(info.ModelName)
|
||||
audioCompletionRatio := setting.GetAudioCompletionRatio(info.ModelName)
|
||||
completionRatio := operation_setting.GetCompletionRatio(info.ModelName)
|
||||
audioRatio := operation_setting.GetAudioRatio(info.ModelName)
|
||||
audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName)
|
||||
ratio := info.GroupRatio * info.ModelRatio
|
||||
|
||||
quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio))
|
||||
@@ -75,7 +76,7 @@ func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usag
|
||||
audioInputTokens := usage.InputTokenDetails.AudioTokens
|
||||
audioOutTokens := usage.OutputTokenDetails.AudioTokens
|
||||
groupRatio := setting.GetGroupRatio(relayInfo.Group)
|
||||
modelRatio, _ := setting.GetModelRatio(modelName)
|
||||
modelRatio, _ := operation_setting.GetModelRatio(modelName)
|
||||
|
||||
quotaInfo := QuotaInfo{
|
||||
InputDetails: TokenDetails{
|
||||
@@ -122,9 +123,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
|
||||
audioOutTokens := usage.OutputTokenDetails.AudioTokens
|
||||
|
||||
tokenName := ctx.GetString("token_name")
|
||||
completionRatio := setting.GetCompletionRatio(modelName)
|
||||
audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName)
|
||||
audioCompletionRatio := setting.GetAudioCompletionRatio(modelName)
|
||||
completionRatio := operation_setting.GetCompletionRatio(modelName)
|
||||
audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName)
|
||||
audioCompletionRatio := operation_setting.GetAudioCompletionRatio(modelName)
|
||||
|
||||
quotaInfo := QuotaInfo{
|
||||
InputDetails: TokenDetails{
|
||||
@@ -184,9 +185,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
||||
audioOutTokens := usage.CompletionTokenDetails.AudioTokens
|
||||
|
||||
tokenName := ctx.GetString("token_name")
|
||||
completionRatio := setting.GetCompletionRatio(relayInfo.OriginModelName)
|
||||
audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName)
|
||||
audioCompletionRatio := setting.GetAudioCompletionRatio(relayInfo.OriginModelName)
|
||||
completionRatio := operation_setting.GetCompletionRatio(relayInfo.OriginModelName)
|
||||
audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName)
|
||||
audioCompletionRatio := operation_setting.GetAudioCompletionRatio(relayInfo.OriginModelName)
|
||||
|
||||
modelRatio := priceData.ModelRatio
|
||||
groupRatio := priceData.GroupRatio
|
||||
|
||||
@@ -10,7 +10,7 @@ import (
|
||||
"one-api/constant"
|
||||
"one-api/dto"
|
||||
relaycommon "one-api/relay/common"
|
||||
"one-api/setting"
|
||||
"one-api/setting/operation_setting"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
@@ -33,7 +33,7 @@ func InitTokenEncoders() {
|
||||
if err != nil {
|
||||
common.FatalLog(fmt.Sprintf("failed to get gpt-4o token encoder: %s", err.Error()))
|
||||
}
|
||||
for model, _ := range setting.GetDefaultModelRatioMap() {
|
||||
for model, _ := range operation_setting.GetDefaultModelRatioMap() {
|
||||
if strings.HasPrefix(model, "gpt-3.5") {
|
||||
tokenEncoderMap[model] = cl100TokenEncoder
|
||||
} else if strings.HasPrefix(model, "gpt-4") {
|
||||
|
||||
Reference in New Issue
Block a user