feat: Implement cache token ratio for more precise token pricing
This commit is contained in:
@@ -158,7 +158,7 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
|
|||||||
tok := time.Now()
|
tok := time.Now()
|
||||||
milliseconds := tok.Sub(tik).Milliseconds()
|
milliseconds := tok.Sub(tik).Milliseconds()
|
||||||
consumedTime := float64(milliseconds) / 1000.0
|
consumedTime := float64(milliseconds) / 1000.0
|
||||||
other := service.GenerateTextOtherInfo(c, info, priceData.ModelRatio, priceData.GroupRatio, priceData.CompletionRatio, priceData.ModelPrice)
|
other := service.GenerateTextOtherInfo(c, info, priceData.ModelRatio, priceData.GroupRatio, priceData.CompletionRatio, 0, 0.0, priceData.ModelPrice)
|
||||||
model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, info.OriginModelName, "模型测试",
|
model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, info.OriginModelName, "模型测试",
|
||||||
quota, "模型测试", 0, quota, int(consumedTime), false, info.Group, other)
|
quota, "模型测试", 0, quota, int(consumedTime), false, info.Group, other)
|
||||||
common.SysLog(fmt.Sprintf("testing channel #%d, response: \n%s", channel.Id, string(respBody)))
|
common.SysLog(fmt.Sprintf("testing channel #%d, response: \n%s", channel.Id, string(respBody)))
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"github.com/gin-gonic/gin"
|
"github.com/gin-gonic/gin"
|
||||||
"one-api/model"
|
"one-api/model"
|
||||||
"one-api/setting"
|
"one-api/setting"
|
||||||
|
"one-api/setting/operation_setting"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GetPricing(c *gin.Context) {
|
func GetPricing(c *gin.Context) {
|
||||||
@@ -39,7 +40,7 @@ func GetPricing(c *gin.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func ResetModelRatio(c *gin.Context) {
|
func ResetModelRatio(c *gin.Context) {
|
||||||
defaultStr := setting.DefaultModelRatio2JSONString()
|
defaultStr := operation_setting.DefaultModelRatio2JSONString()
|
||||||
err := model.UpdateOption("ModelRatio", defaultStr)
|
err := model.UpdateOption("ModelRatio", defaultStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.JSON(200, gin.H{
|
c.JSON(200, gin.H{
|
||||||
@@ -48,7 +49,7 @@ func ResetModelRatio(c *gin.Context) {
|
|||||||
})
|
})
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
err = setting.UpdateModelRatioByJSONString(defaultStr)
|
err = operation_setting.UpdateModelRatioByJSONString(defaultStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.JSON(200, gin.H{
|
c.JSON(200, gin.H{
|
||||||
"success": false,
|
"success": false,
|
||||||
|
|||||||
@@ -92,11 +92,12 @@ func InitOptionMap() {
|
|||||||
common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount)
|
common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount)
|
||||||
common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes)
|
common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes)
|
||||||
common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount)
|
common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount)
|
||||||
common.OptionMap["ModelRatio"] = setting.ModelRatio2JSONString()
|
common.OptionMap["ModelRatio"] = operation_setting.ModelRatio2JSONString()
|
||||||
common.OptionMap["ModelPrice"] = setting.ModelPrice2JSONString()
|
common.OptionMap["ModelPrice"] = operation_setting.ModelPrice2JSONString()
|
||||||
|
common.OptionMap["CacheRatio"] = operation_setting.CacheRatio2JSONString()
|
||||||
common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString()
|
common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString()
|
||||||
common.OptionMap["UserUsableGroups"] = setting.UserUsableGroups2JSONString()
|
common.OptionMap["UserUsableGroups"] = setting.UserUsableGroups2JSONString()
|
||||||
common.OptionMap["CompletionRatio"] = setting.CompletionRatio2JSONString()
|
common.OptionMap["CompletionRatio"] = operation_setting.CompletionRatio2JSONString()
|
||||||
common.OptionMap["TopUpLink"] = common.TopUpLink
|
common.OptionMap["TopUpLink"] = common.TopUpLink
|
||||||
common.OptionMap["ChatLink"] = common.ChatLink
|
common.OptionMap["ChatLink"] = common.ChatLink
|
||||||
common.OptionMap["ChatLink2"] = common.ChatLink2
|
common.OptionMap["ChatLink2"] = common.ChatLink2
|
||||||
@@ -344,15 +345,17 @@ func updateOptionMap(key string, value string) (err error) {
|
|||||||
case "DataExportDefaultTime":
|
case "DataExportDefaultTime":
|
||||||
common.DataExportDefaultTime = value
|
common.DataExportDefaultTime = value
|
||||||
case "ModelRatio":
|
case "ModelRatio":
|
||||||
err = setting.UpdateModelRatioByJSONString(value)
|
err = operation_setting.UpdateModelRatioByJSONString(value)
|
||||||
case "GroupRatio":
|
case "GroupRatio":
|
||||||
err = setting.UpdateGroupRatioByJSONString(value)
|
err = setting.UpdateGroupRatioByJSONString(value)
|
||||||
case "UserUsableGroups":
|
case "UserUsableGroups":
|
||||||
err = setting.UpdateUserUsableGroupsByJSONString(value)
|
err = setting.UpdateUserUsableGroupsByJSONString(value)
|
||||||
case "CompletionRatio":
|
case "CompletionRatio":
|
||||||
err = setting.UpdateCompletionRatioByJSONString(value)
|
err = operation_setting.UpdateCompletionRatioByJSONString(value)
|
||||||
case "ModelPrice":
|
case "ModelPrice":
|
||||||
err = setting.UpdateModelPriceByJSONString(value)
|
err = operation_setting.UpdateModelPriceByJSONString(value)
|
||||||
|
case "CacheRatio":
|
||||||
|
err = operation_setting.UpdateCacheRatioByJSONString(value)
|
||||||
case "TopUpLink":
|
case "TopUpLink":
|
||||||
common.TopUpLink = value
|
common.TopUpLink = value
|
||||||
case "ChatLink":
|
case "ChatLink":
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ package model
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"one-api/common"
|
"one-api/common"
|
||||||
"one-api/setting"
|
"one-api/setting/operation_setting"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@@ -65,14 +65,14 @@ func updatePricing() {
|
|||||||
ModelName: model,
|
ModelName: model,
|
||||||
EnableGroup: groups,
|
EnableGroup: groups,
|
||||||
}
|
}
|
||||||
modelPrice, findPrice := setting.GetModelPrice(model, false)
|
modelPrice, findPrice := operation_setting.GetModelPrice(model, false)
|
||||||
if findPrice {
|
if findPrice {
|
||||||
pricing.ModelPrice = modelPrice
|
pricing.ModelPrice = modelPrice
|
||||||
pricing.QuotaType = 1
|
pricing.QuotaType = 1
|
||||||
} else {
|
} else {
|
||||||
modelRatio, _ := setting.GetModelRatio(model)
|
modelRatio, _ := operation_setting.GetModelRatio(model)
|
||||||
pricing.ModelRatio = modelRatio
|
pricing.ModelRatio = modelRatio
|
||||||
pricing.CompletionRatio = setting.GetCompletionRatio(model)
|
pricing.CompletionRatio = operation_setting.GetCompletionRatio(model)
|
||||||
pricing.QuotaType = 0
|
pricing.QuotaType = 0
|
||||||
}
|
}
|
||||||
pricingMap = append(pricingMap, pricing)
|
pricingMap = append(pricingMap, pricing)
|
||||||
|
|||||||
@@ -6,30 +6,33 @@ import (
|
|||||||
"one-api/common"
|
"one-api/common"
|
||||||
relaycommon "one-api/relay/common"
|
relaycommon "one-api/relay/common"
|
||||||
"one-api/setting"
|
"one-api/setting"
|
||||||
|
"one-api/setting/operation_setting"
|
||||||
)
|
)
|
||||||
|
|
||||||
type PriceData struct {
|
type PriceData struct {
|
||||||
ModelPrice float64
|
ModelPrice float64
|
||||||
ModelRatio float64
|
ModelRatio float64
|
||||||
CompletionRatio float64
|
CompletionRatio float64
|
||||||
|
CacheRatio float64
|
||||||
GroupRatio float64
|
GroupRatio float64
|
||||||
UsePrice bool
|
UsePrice bool
|
||||||
ShouldPreConsumedQuota int
|
ShouldPreConsumedQuota int
|
||||||
}
|
}
|
||||||
|
|
||||||
func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, maxTokens int) (PriceData, error) {
|
func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, maxTokens int) (PriceData, error) {
|
||||||
modelPrice, usePrice := setting.GetModelPrice(info.OriginModelName, false)
|
modelPrice, usePrice := operation_setting.GetModelPrice(info.OriginModelName, false)
|
||||||
groupRatio := setting.GetGroupRatio(info.Group)
|
groupRatio := setting.GetGroupRatio(info.Group)
|
||||||
var preConsumedQuota int
|
var preConsumedQuota int
|
||||||
var modelRatio float64
|
var modelRatio float64
|
||||||
var completionRatio float64
|
var completionRatio float64
|
||||||
|
var cacheRatio float64
|
||||||
if !usePrice {
|
if !usePrice {
|
||||||
preConsumedTokens := common.PreConsumedQuota
|
preConsumedTokens := common.PreConsumedQuota
|
||||||
if maxTokens != 0 {
|
if maxTokens != 0 {
|
||||||
preConsumedTokens = promptTokens + maxTokens
|
preConsumedTokens = promptTokens + maxTokens
|
||||||
}
|
}
|
||||||
var success bool
|
var success bool
|
||||||
modelRatio, success = setting.GetModelRatio(info.OriginModelName)
|
modelRatio, success = operation_setting.GetModelRatio(info.OriginModelName)
|
||||||
if !success {
|
if !success {
|
||||||
if info.UserId == 1 {
|
if info.UserId == 1 {
|
||||||
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置,请设置或开始自用模式;Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName)
|
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置,请设置或开始自用模式;Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName)
|
||||||
@@ -37,7 +40,8 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
|
|||||||
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置, 请联系管理员设置;Model %s ratio or price not set, please contact administrator to set", info.OriginModelName, info.OriginModelName)
|
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置, 请联系管理员设置;Model %s ratio or price not set, please contact administrator to set", info.OriginModelName, info.OriginModelName)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
completionRatio = setting.GetCompletionRatio(info.OriginModelName)
|
completionRatio = operation_setting.GetCompletionRatio(info.OriginModelName)
|
||||||
|
cacheRatio, _ = operation_setting.GetCacheRatio(info.OriginModelName)
|
||||||
ratio := modelRatio * groupRatio
|
ratio := modelRatio * groupRatio
|
||||||
preConsumedQuota = int(float64(preConsumedTokens) * ratio)
|
preConsumedQuota = int(float64(preConsumedTokens) * ratio)
|
||||||
} else {
|
} else {
|
||||||
@@ -49,6 +53,7 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
|
|||||||
CompletionRatio: completionRatio,
|
CompletionRatio: completionRatio,
|
||||||
GroupRatio: groupRatio,
|
GroupRatio: groupRatio,
|
||||||
UsePrice: usePrice,
|
UsePrice: usePrice,
|
||||||
|
CacheRatio: cacheRatio,
|
||||||
ShouldPreConsumedQuota: preConsumedQuota,
|
ShouldPreConsumedQuota: preConsumedQuota,
|
||||||
}, nil
|
}, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ import (
|
|||||||
relayconstant "one-api/relay/constant"
|
relayconstant "one-api/relay/constant"
|
||||||
"one-api/service"
|
"one-api/service"
|
||||||
"one-api/setting"
|
"one-api/setting"
|
||||||
|
"one-api/setting/operation_setting"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -157,10 +158,10 @@ func RelaySwapFace(c *gin.Context) *dto.MidjourneyResponse {
|
|||||||
return service.MidjourneyErrorWrapper(constant.MjRequestError, "sour_base64_and_target_base64_is_required")
|
return service.MidjourneyErrorWrapper(constant.MjRequestError, "sour_base64_and_target_base64_is_required")
|
||||||
}
|
}
|
||||||
modelName := service.CoverActionToModelName(constant.MjActionSwapFace)
|
modelName := service.CoverActionToModelName(constant.MjActionSwapFace)
|
||||||
modelPrice, success := setting.GetModelPrice(modelName, true)
|
modelPrice, success := operation_setting.GetModelPrice(modelName, true)
|
||||||
// 如果没有配置价格,则使用默认价格
|
// 如果没有配置价格,则使用默认价格
|
||||||
if !success {
|
if !success {
|
||||||
defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName]
|
defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName]
|
||||||
if !ok {
|
if !ok {
|
||||||
modelPrice = 0.1
|
modelPrice = 0.1
|
||||||
} else {
|
} else {
|
||||||
@@ -463,10 +464,10 @@ func RelayMidjourneySubmit(c *gin.Context, relayMode int) *dto.MidjourneyRespons
|
|||||||
fullRequestURL := fmt.Sprintf("%s%s", baseURL, requestURL)
|
fullRequestURL := fmt.Sprintf("%s%s", baseURL, requestURL)
|
||||||
|
|
||||||
modelName := service.CoverActionToModelName(midjRequest.Action)
|
modelName := service.CoverActionToModelName(midjRequest.Action)
|
||||||
modelPrice, success := setting.GetModelPrice(modelName, true)
|
modelPrice, success := operation_setting.GetModelPrice(modelName, true)
|
||||||
// 如果没有配置价格,则使用默认价格
|
// 如果没有配置价格,则使用默认价格
|
||||||
if !success {
|
if !success {
|
||||||
defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName]
|
defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName]
|
||||||
if !ok {
|
if !ok {
|
||||||
modelPrice = 0.1
|
modelPrice = 0.1
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -304,24 +304,26 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
|||||||
CompletionTokens: 0,
|
CompletionTokens: 0,
|
||||||
TotalTokens: relayInfo.PromptTokens,
|
TotalTokens: relayInfo.PromptTokens,
|
||||||
}
|
}
|
||||||
extraContent += " ,(可能是请求出错)"
|
extraContent += "(可能是请求出错)"
|
||||||
}
|
}
|
||||||
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
||||||
promptTokens := usage.PromptTokens
|
promptTokens := usage.PromptTokens
|
||||||
|
cacheTokens := usage.PromptTokensDetails.CachedTokens
|
||||||
completionTokens := usage.CompletionTokens
|
completionTokens := usage.CompletionTokens
|
||||||
modelName := relayInfo.OriginModelName
|
modelName := relayInfo.OriginModelName
|
||||||
|
|
||||||
tokenName := ctx.GetString("token_name")
|
tokenName := ctx.GetString("token_name")
|
||||||
completionRatio := setting.GetCompletionRatio(modelName)
|
completionRatio := priceData.CompletionRatio
|
||||||
|
cacheRatio := priceData.CacheRatio
|
||||||
ratio := priceData.ModelRatio * priceData.GroupRatio
|
ratio := priceData.ModelRatio * priceData.GroupRatio
|
||||||
modelRatio := priceData.ModelRatio
|
modelRatio := priceData.ModelRatio
|
||||||
groupRatio := priceData.GroupRatio
|
groupRatio := priceData.GroupRatio
|
||||||
modelPrice := priceData.ModelPrice
|
modelPrice := priceData.ModelPrice
|
||||||
usePrice := priceData.UsePrice
|
|
||||||
|
|
||||||
quota := 0
|
quota := 0
|
||||||
if !priceData.UsePrice {
|
if !priceData.UsePrice {
|
||||||
quota = promptTokens + int(math.Round(float64(completionTokens)*completionRatio))
|
quota = (promptTokens - cacheTokens) + int(math.Round(float64(cacheTokens)*cacheRatio))
|
||||||
|
quota += int(math.Round(float64(completionTokens) * completionRatio))
|
||||||
quota = int(math.Round(float64(quota) * ratio))
|
quota = int(math.Round(float64(quota) * ratio))
|
||||||
if ratio != 0 && quota <= 0 {
|
if ratio != 0 && quota <= 0 {
|
||||||
quota = 1
|
quota = 1
|
||||||
@@ -330,8 +332,9 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
|||||||
quota = int(modelPrice * common.QuotaPerUnit * groupRatio)
|
quota = int(modelPrice * common.QuotaPerUnit * groupRatio)
|
||||||
}
|
}
|
||||||
totalTokens := promptTokens + completionTokens
|
totalTokens := promptTokens + completionTokens
|
||||||
|
|
||||||
var logContent string
|
var logContent string
|
||||||
if !usePrice {
|
if !priceData.UsePrice {
|
||||||
logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio)
|
logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio)
|
||||||
} else {
|
} else {
|
||||||
logContent = fmt.Sprintf("模型价格 %.2f,分组倍率 %.2f", modelPrice, groupRatio)
|
logContent = fmt.Sprintf("模型价格 %.2f,分组倍率 %.2f", modelPrice, groupRatio)
|
||||||
@@ -372,7 +375,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
|||||||
if extraContent != "" {
|
if extraContent != "" {
|
||||||
logContent += ", " + extraContent
|
logContent += ", " + extraContent
|
||||||
}
|
}
|
||||||
other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice)
|
other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice)
|
||||||
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel,
|
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel,
|
||||||
tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
|
tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
|
||||||
|
|
||||||
|
|||||||
@@ -16,6 +16,7 @@ import (
|
|||||||
relayconstant "one-api/relay/constant"
|
relayconstant "one-api/relay/constant"
|
||||||
"one-api/service"
|
"one-api/service"
|
||||||
"one-api/setting"
|
"one-api/setting"
|
||||||
|
"one-api/setting/operation_setting"
|
||||||
)
|
)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -37,9 +38,9 @@ func RelayTaskSubmit(c *gin.Context, relayMode int) (taskErr *dto.TaskError) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
modelName := service.CoverTaskActionToModelName(platform, relayInfo.Action)
|
modelName := service.CoverTaskActionToModelName(platform, relayInfo.Action)
|
||||||
modelPrice, success := setting.GetModelPrice(modelName, true)
|
modelPrice, success := operation_setting.GetModelPrice(modelName, true)
|
||||||
if !success {
|
if !success {
|
||||||
defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName]
|
defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName]
|
||||||
if !ok {
|
if !ok {
|
||||||
modelPrice = 0.1
|
modelPrice = 0.1
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import (
|
|||||||
relaycommon "one-api/relay/common"
|
relaycommon "one-api/relay/common"
|
||||||
"one-api/service"
|
"one-api/service"
|
||||||
"one-api/setting"
|
"one-api/setting"
|
||||||
|
"one-api/setting/operation_setting"
|
||||||
)
|
)
|
||||||
|
|
||||||
func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWithStatusCode) {
|
func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWithStatusCode) {
|
||||||
@@ -39,7 +40,7 @@ func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
//relayInfo.UpstreamModelName = textRequest.Model
|
//relayInfo.UpstreamModelName = textRequest.Model
|
||||||
modelPrice, getModelPriceSuccess := setting.GetModelPrice(relayInfo.UpstreamModelName, false)
|
modelPrice, getModelPriceSuccess := operation_setting.GetModelPrice(relayInfo.UpstreamModelName, false)
|
||||||
groupRatio := setting.GetGroupRatio(relayInfo.Group)
|
groupRatio := setting.GetGroupRatio(relayInfo.Group)
|
||||||
|
|
||||||
var preConsumedQuota int
|
var preConsumedQuota int
|
||||||
@@ -65,7 +66,7 @@ func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWi
|
|||||||
//if realtimeEvent.Session.MaxResponseOutputTokens != 0 {
|
//if realtimeEvent.Session.MaxResponseOutputTokens != 0 {
|
||||||
// preConsumedTokens = promptTokens + int(realtimeEvent.Session.MaxResponseOutputTokens)
|
// preConsumedTokens = promptTokens + int(realtimeEvent.Session.MaxResponseOutputTokens)
|
||||||
//}
|
//}
|
||||||
modelRatio, _ = setting.GetModelRatio(relayInfo.UpstreamModelName)
|
modelRatio, _ = operation_setting.GetModelRatio(relayInfo.UpstreamModelName)
|
||||||
ratio = modelRatio * groupRatio
|
ratio = modelRatio * groupRatio
|
||||||
preConsumedQuota = int(float64(preConsumedTokens) * ratio)
|
preConsumedQuota = int(float64(preConsumedTokens) * ratio)
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
@@ -1,16 +1,20 @@
|
|||||||
package service
|
package service
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"github.com/gin-gonic/gin"
|
|
||||||
"one-api/dto"
|
"one-api/dto"
|
||||||
relaycommon "one-api/relay/common"
|
relaycommon "one-api/relay/common"
|
||||||
|
|
||||||
|
"github.com/gin-gonic/gin"
|
||||||
)
|
)
|
||||||
|
|
||||||
func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio, modelPrice float64) map[string]interface{} {
|
func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio float64,
|
||||||
|
cacheTokens int, cacheRatio float64, modelPrice float64) map[string]interface{} {
|
||||||
other := make(map[string]interface{})
|
other := make(map[string]interface{})
|
||||||
other["model_ratio"] = modelRatio
|
other["model_ratio"] = modelRatio
|
||||||
other["group_ratio"] = groupRatio
|
other["group_ratio"] = groupRatio
|
||||||
other["completion_ratio"] = completionRatio
|
other["completion_ratio"] = completionRatio
|
||||||
|
other["cache_tokens"] = cacheTokens
|
||||||
|
other["cache_ratio"] = cacheRatio
|
||||||
other["model_price"] = modelPrice
|
other["model_price"] = modelPrice
|
||||||
other["frt"] = float64(relayInfo.FirstResponseTime.UnixMilli() - relayInfo.StartTime.UnixMilli())
|
other["frt"] = float64(relayInfo.FirstResponseTime.UnixMilli() - relayInfo.StartTime.UnixMilli())
|
||||||
if relayInfo.ReasoningEffort != "" {
|
if relayInfo.ReasoningEffort != "" {
|
||||||
@@ -27,7 +31,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
|
func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
|
||||||
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice)
|
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice)
|
||||||
info["ws"] = true
|
info["ws"] = true
|
||||||
info["audio_input"] = usage.InputTokenDetails.AudioTokens
|
info["audio_input"] = usage.InputTokenDetails.AudioTokens
|
||||||
info["audio_output"] = usage.OutputTokenDetails.AudioTokens
|
info["audio_output"] = usage.OutputTokenDetails.AudioTokens
|
||||||
@@ -39,7 +43,7 @@ func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, us
|
|||||||
}
|
}
|
||||||
|
|
||||||
func GenerateAudioOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
|
func GenerateAudioOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
|
||||||
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice)
|
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice)
|
||||||
info["audio"] = true
|
info["audio"] = true
|
||||||
info["audio_input"] = usage.PromptTokensDetails.AudioTokens
|
info["audio_input"] = usage.PromptTokensDetails.AudioTokens
|
||||||
info["audio_output"] = usage.CompletionTokenDetails.AudioTokens
|
info["audio_output"] = usage.CompletionTokenDetails.AudioTokens
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ import (
|
|||||||
relaycommon "one-api/relay/common"
|
relaycommon "one-api/relay/common"
|
||||||
"one-api/relay/helper"
|
"one-api/relay/helper"
|
||||||
"one-api/setting"
|
"one-api/setting"
|
||||||
|
"one-api/setting/operation_setting"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@@ -38,9 +39,9 @@ func calculateAudioQuota(info QuotaInfo) int {
|
|||||||
return int(info.ModelPrice * common.QuotaPerUnit * info.GroupRatio)
|
return int(info.ModelPrice * common.QuotaPerUnit * info.GroupRatio)
|
||||||
}
|
}
|
||||||
|
|
||||||
completionRatio := setting.GetCompletionRatio(info.ModelName)
|
completionRatio := operation_setting.GetCompletionRatio(info.ModelName)
|
||||||
audioRatio := setting.GetAudioRatio(info.ModelName)
|
audioRatio := operation_setting.GetAudioRatio(info.ModelName)
|
||||||
audioCompletionRatio := setting.GetAudioCompletionRatio(info.ModelName)
|
audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName)
|
||||||
ratio := info.GroupRatio * info.ModelRatio
|
ratio := info.GroupRatio * info.ModelRatio
|
||||||
|
|
||||||
quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio))
|
quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio))
|
||||||
@@ -75,7 +76,7 @@ func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usag
|
|||||||
audioInputTokens := usage.InputTokenDetails.AudioTokens
|
audioInputTokens := usage.InputTokenDetails.AudioTokens
|
||||||
audioOutTokens := usage.OutputTokenDetails.AudioTokens
|
audioOutTokens := usage.OutputTokenDetails.AudioTokens
|
||||||
groupRatio := setting.GetGroupRatio(relayInfo.Group)
|
groupRatio := setting.GetGroupRatio(relayInfo.Group)
|
||||||
modelRatio, _ := setting.GetModelRatio(modelName)
|
modelRatio, _ := operation_setting.GetModelRatio(modelName)
|
||||||
|
|
||||||
quotaInfo := QuotaInfo{
|
quotaInfo := QuotaInfo{
|
||||||
InputDetails: TokenDetails{
|
InputDetails: TokenDetails{
|
||||||
@@ -122,9 +123,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
|
|||||||
audioOutTokens := usage.OutputTokenDetails.AudioTokens
|
audioOutTokens := usage.OutputTokenDetails.AudioTokens
|
||||||
|
|
||||||
tokenName := ctx.GetString("token_name")
|
tokenName := ctx.GetString("token_name")
|
||||||
completionRatio := setting.GetCompletionRatio(modelName)
|
completionRatio := operation_setting.GetCompletionRatio(modelName)
|
||||||
audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName)
|
audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName)
|
||||||
audioCompletionRatio := setting.GetAudioCompletionRatio(modelName)
|
audioCompletionRatio := operation_setting.GetAudioCompletionRatio(modelName)
|
||||||
|
|
||||||
quotaInfo := QuotaInfo{
|
quotaInfo := QuotaInfo{
|
||||||
InputDetails: TokenDetails{
|
InputDetails: TokenDetails{
|
||||||
@@ -184,9 +185,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
|||||||
audioOutTokens := usage.CompletionTokenDetails.AudioTokens
|
audioOutTokens := usage.CompletionTokenDetails.AudioTokens
|
||||||
|
|
||||||
tokenName := ctx.GetString("token_name")
|
tokenName := ctx.GetString("token_name")
|
||||||
completionRatio := setting.GetCompletionRatio(relayInfo.OriginModelName)
|
completionRatio := operation_setting.GetCompletionRatio(relayInfo.OriginModelName)
|
||||||
audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName)
|
audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName)
|
||||||
audioCompletionRatio := setting.GetAudioCompletionRatio(relayInfo.OriginModelName)
|
audioCompletionRatio := operation_setting.GetAudioCompletionRatio(relayInfo.OriginModelName)
|
||||||
|
|
||||||
modelRatio := priceData.ModelRatio
|
modelRatio := priceData.ModelRatio
|
||||||
groupRatio := priceData.GroupRatio
|
groupRatio := priceData.GroupRatio
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ import (
|
|||||||
"one-api/constant"
|
"one-api/constant"
|
||||||
"one-api/dto"
|
"one-api/dto"
|
||||||
relaycommon "one-api/relay/common"
|
relaycommon "one-api/relay/common"
|
||||||
"one-api/setting"
|
"one-api/setting/operation_setting"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode/utf8"
|
"unicode/utf8"
|
||||||
|
|
||||||
@@ -33,7 +33,7 @@ func InitTokenEncoders() {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
common.FatalLog(fmt.Sprintf("failed to get gpt-4o token encoder: %s", err.Error()))
|
common.FatalLog(fmt.Sprintf("failed to get gpt-4o token encoder: %s", err.Error()))
|
||||||
}
|
}
|
||||||
for model, _ := range setting.GetDefaultModelRatioMap() {
|
for model, _ := range operation_setting.GetDefaultModelRatioMap() {
|
||||||
if strings.HasPrefix(model, "gpt-3.5") {
|
if strings.HasPrefix(model, "gpt-3.5") {
|
||||||
tokenEncoderMap[model] = cl100TokenEncoder
|
tokenEncoderMap[model] = cl100TokenEncoder
|
||||||
} else if strings.HasPrefix(model, "gpt-4") {
|
} else if strings.HasPrefix(model, "gpt-4") {
|
||||||
|
|||||||
77
setting/operation_setting/cache_ratio.go
Normal file
77
setting/operation_setting/cache_ratio.go
Normal file
@@ -0,0 +1,77 @@
|
|||||||
|
package operation_setting
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"one-api/common"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
var defaultCacheRatio = map[string]float64{
|
||||||
|
"gpt-4": 0.5,
|
||||||
|
"o1-2024-12-17": 0.5,
|
||||||
|
"o1-preview-2024-09-12": 0.5,
|
||||||
|
"o1-mini-2024-09-12": 0.5,
|
||||||
|
"gpt-4o-2024-11-20": 0.5,
|
||||||
|
"gpt-4o-2024-08-06": 0.5,
|
||||||
|
"gpt-4o-mini-2024-07-18": 0.5,
|
||||||
|
"gpt-4o-realtime-preview": 0.5,
|
||||||
|
"gpt-4o-mini-realtime-preview": 0.5,
|
||||||
|
"deepseek-chat": 0.5,
|
||||||
|
"deepseek-reasoner": 0.5,
|
||||||
|
"deepseek-coder": 0.5,
|
||||||
|
}
|
||||||
|
|
||||||
|
var cacheRatioMap map[string]float64
|
||||||
|
var cacheRatioMapMutex sync.RWMutex
|
||||||
|
|
||||||
|
// GetCacheRatioMap returns the cache ratio map
|
||||||
|
func GetCacheRatioMap() map[string]float64 {
|
||||||
|
cacheRatioMapMutex.Lock()
|
||||||
|
defer cacheRatioMapMutex.Unlock()
|
||||||
|
if cacheRatioMap == nil {
|
||||||
|
cacheRatioMap = defaultCacheRatio
|
||||||
|
}
|
||||||
|
return cacheRatioMap
|
||||||
|
}
|
||||||
|
|
||||||
|
// CacheRatio2JSONString converts the cache ratio map to a JSON string
|
||||||
|
func CacheRatio2JSONString() string {
|
||||||
|
GetCacheRatioMap()
|
||||||
|
jsonBytes, err := json.Marshal(cacheRatioMap)
|
||||||
|
if err != nil {
|
||||||
|
common.SysError("error marshalling cache ratio: " + err.Error())
|
||||||
|
}
|
||||||
|
return string(jsonBytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateCacheRatioByJSONString updates the cache ratio map from a JSON string
|
||||||
|
func UpdateCacheRatioByJSONString(jsonStr string) error {
|
||||||
|
cacheRatioMapMutex.Lock()
|
||||||
|
defer cacheRatioMapMutex.Unlock()
|
||||||
|
cacheRatioMap = make(map[string]float64)
|
||||||
|
return json.Unmarshal([]byte(jsonStr), &cacheRatioMap)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetCacheRatio returns the cache ratio for a model
|
||||||
|
func GetCacheRatio(name string) (float64, bool) {
|
||||||
|
GetCacheRatioMap()
|
||||||
|
ratio, ok := cacheRatioMap[name]
|
||||||
|
if !ok {
|
||||||
|
return 0.5, false // Default to 0.5 if not found
|
||||||
|
}
|
||||||
|
return ratio, true
|
||||||
|
}
|
||||||
|
|
||||||
|
// DefaultCacheRatio2JSONString converts the default cache ratio map to a JSON string
|
||||||
|
func DefaultCacheRatio2JSONString() string {
|
||||||
|
jsonBytes, err := json.Marshal(defaultCacheRatio)
|
||||||
|
if err != nil {
|
||||||
|
common.SysError("error marshalling default cache ratio: " + err.Error())
|
||||||
|
}
|
||||||
|
return string(jsonBytes)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetDefaultCacheRatioMap returns the default cache ratio map
|
||||||
|
func GetDefaultCacheRatioMap() map[string]float64 {
|
||||||
|
return defaultCacheRatio
|
||||||
|
}
|
||||||
@@ -1,9 +1,8 @@
|
|||||||
package setting
|
package operation_setting
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"one-api/common"
|
"one-api/common"
|
||||||
"one-api/setting/operation_setting"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
)
|
)
|
||||||
@@ -326,7 +325,7 @@ func GetModelRatio(name string) (float64, bool) {
|
|||||||
}
|
}
|
||||||
ratio, ok := modelRatioMap[name]
|
ratio, ok := modelRatioMap[name]
|
||||||
if !ok {
|
if !ok {
|
||||||
return 37.5, operation_setting.SelfUseModeEnabled
|
return 37.5, SelfUseModeEnabled
|
||||||
}
|
}
|
||||||
return ratio, true
|
return ratio, true
|
||||||
}
|
}
|
||||||
@@ -464,6 +464,8 @@ const LogsTable = () => {
|
|||||||
other.model_ratio,
|
other.model_ratio,
|
||||||
other.model_price,
|
other.model_price,
|
||||||
other.group_ratio,
|
other.group_ratio,
|
||||||
|
other.cache_tokens || 0,
|
||||||
|
other.cache_ratio || 1.0,
|
||||||
);
|
);
|
||||||
return (
|
return (
|
||||||
<Paragraph
|
<Paragraph
|
||||||
@@ -665,6 +667,8 @@ const LogsTable = () => {
|
|||||||
other?.audio_ratio,
|
other?.audio_ratio,
|
||||||
other?.audio_completion_ratio,
|
other?.audio_completion_ratio,
|
||||||
other.group_ratio,
|
other.group_ratio,
|
||||||
|
other.cache_tokens || 0,
|
||||||
|
other.cache_ratio || 1.0,
|
||||||
);
|
);
|
||||||
} else {
|
} else {
|
||||||
content = renderModelPrice(
|
content = renderModelPrice(
|
||||||
@@ -674,6 +678,8 @@ const LogsTable = () => {
|
|||||||
other.model_price,
|
other.model_price,
|
||||||
other.completion_ratio,
|
other.completion_ratio,
|
||||||
other.group_ratio,
|
other.group_ratio,
|
||||||
|
other.cache_tokens || 0,
|
||||||
|
other.cache_ratio || 1.0,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
expandDataLocal.push({
|
expandDataLocal.push({
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ const OperationSetting = () => {
|
|||||||
PreConsumedQuota: 0,
|
PreConsumedQuota: 0,
|
||||||
StreamCacheQueueLength: 0,
|
StreamCacheQueueLength: 0,
|
||||||
ModelRatio: '',
|
ModelRatio: '',
|
||||||
|
CacheRatio: '',
|
||||||
CompletionRatio: '',
|
CompletionRatio: '',
|
||||||
ModelPrice: '',
|
ModelPrice: '',
|
||||||
GroupRatio: '',
|
GroupRatio: '',
|
||||||
@@ -77,7 +78,8 @@ const OperationSetting = () => {
|
|||||||
item.key === 'GroupRatio' ||
|
item.key === 'GroupRatio' ||
|
||||||
item.key === 'UserUsableGroups' ||
|
item.key === 'UserUsableGroups' ||
|
||||||
item.key === 'CompletionRatio' ||
|
item.key === 'CompletionRatio' ||
|
||||||
item.key === 'ModelPrice'
|
item.key === 'ModelPrice' ||
|
||||||
|
item.key === 'CacheRatio'
|
||||||
) {
|
) {
|
||||||
item.value = JSON.stringify(JSON.parse(item.value), null, 2);
|
item.value = JSON.stringify(JSON.parse(item.value), null, 2);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -298,6 +298,8 @@ export function renderModelPrice(
|
|||||||
modelPrice = -1,
|
modelPrice = -1,
|
||||||
completionRatio,
|
completionRatio,
|
||||||
groupRatio,
|
groupRatio,
|
||||||
|
cacheTokens = 0,
|
||||||
|
cacheRatio = 1.0,
|
||||||
) {
|
) {
|
||||||
if (modelPrice !== -1) {
|
if (modelPrice !== -1) {
|
||||||
return i18next.t('模型价格:${{price}} * 分组倍率:{{ratio}} = ${{total}}', {
|
return i18next.t('模型价格:${{price}} * 分组倍率:{{ratio}} = ${{total}}', {
|
||||||
@@ -311,9 +313,15 @@ export function renderModelPrice(
|
|||||||
}
|
}
|
||||||
let inputRatioPrice = modelRatio * 2.0;
|
let inputRatioPrice = modelRatio * 2.0;
|
||||||
let completionRatioPrice = modelRatio * 2.0 * completionRatio;
|
let completionRatioPrice = modelRatio * 2.0 * completionRatio;
|
||||||
|
let cacheRatioPrice = modelRatio * 2.0 * cacheRatio;
|
||||||
|
|
||||||
|
// Calculate effective input tokens (non-cached + cached with ratio applied)
|
||||||
|
const effectiveInputTokens = (inputTokens - cacheTokens) + (cacheTokens * cacheRatio);
|
||||||
|
|
||||||
let price =
|
let price =
|
||||||
(inputTokens / 1000000) * inputRatioPrice * groupRatio +
|
(effectiveInputTokens / 1000000) * inputRatioPrice * groupRatio +
|
||||||
(completionTokens / 1000000) * completionRatioPrice * groupRatio;
|
(completionTokens / 1000000) * completionRatioPrice * groupRatio;
|
||||||
|
|
||||||
return (
|
return (
|
||||||
<>
|
<>
|
||||||
<article>
|
<article>
|
||||||
@@ -327,16 +335,36 @@ export function renderModelPrice(
|
|||||||
ratio: groupRatio,
|
ratio: groupRatio,
|
||||||
total: completionRatioPrice * groupRatio
|
total: completionRatioPrice * groupRatio
|
||||||
})}</p>
|
})}</p>
|
||||||
|
{cacheTokens > 0 && (
|
||||||
|
<p>{i18next.t('缓存:${{price}} * {{ratio}} = ${{total}} / 1M tokens (缓存比例: {{cacheRatio}})', {
|
||||||
|
price: cacheRatioPrice,
|
||||||
|
ratio: groupRatio,
|
||||||
|
total: cacheRatioPrice * groupRatio,
|
||||||
|
cacheRatio: cacheRatio
|
||||||
|
})}</p>
|
||||||
|
)}
|
||||||
<p></p>
|
<p></p>
|
||||||
<p>
|
<p>
|
||||||
{i18next.t('提示 {{input}} tokens / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', {
|
{cacheTokens > 0 ?
|
||||||
input: inputTokens,
|
i18next.t('提示 {{nonCacheInput}} tokens + 缓存 {{cacheInput}} tokens * {{cacheRatio}} / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', {
|
||||||
price: inputRatioPrice,
|
nonCacheInput: inputTokens - cacheTokens,
|
||||||
completion: completionTokens,
|
cacheInput: cacheTokens,
|
||||||
compPrice: completionRatioPrice,
|
cacheRatio: cacheRatio,
|
||||||
ratio: groupRatio,
|
price: inputRatioPrice,
|
||||||
total: price.toFixed(6)
|
completion: completionTokens,
|
||||||
})}
|
compPrice: completionRatioPrice,
|
||||||
|
ratio: groupRatio,
|
||||||
|
total: price.toFixed(6)
|
||||||
|
}) :
|
||||||
|
i18next.t('提示 {{input}} tokens / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', {
|
||||||
|
input: inputTokens,
|
||||||
|
price: inputRatioPrice,
|
||||||
|
completion: completionTokens,
|
||||||
|
compPrice: completionRatioPrice,
|
||||||
|
ratio: groupRatio,
|
||||||
|
total: price.toFixed(6)
|
||||||
|
})
|
||||||
|
}
|
||||||
</p>
|
</p>
|
||||||
<p>{i18next.t('仅供参考,以实际扣费为准')}</p>
|
<p>{i18next.t('仅供参考,以实际扣费为准')}</p>
|
||||||
</article>
|
</article>
|
||||||
@@ -349,6 +377,8 @@ export function renderModelPriceSimple(
|
|||||||
modelRatio,
|
modelRatio,
|
||||||
modelPrice = -1,
|
modelPrice = -1,
|
||||||
groupRatio,
|
groupRatio,
|
||||||
|
cacheTokens = 0,
|
||||||
|
cacheRatio = 1.0,
|
||||||
) {
|
) {
|
||||||
if (modelPrice !== -1) {
|
if (modelPrice !== -1) {
|
||||||
return i18next.t('价格:${{price}} * 分组:{{ratio}}', {
|
return i18next.t('价格:${{price}} * 分组:{{ratio}}', {
|
||||||
@@ -356,10 +386,18 @@ export function renderModelPriceSimple(
|
|||||||
ratio: groupRatio
|
ratio: groupRatio
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}}', {
|
if (cacheTokens !== 0) {
|
||||||
ratio: modelRatio,
|
return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}} * 缓存比例: {{cacheRatio}}', {
|
||||||
groupRatio: groupRatio
|
ratio: modelRatio,
|
||||||
});
|
groupRatio: groupRatio,
|
||||||
|
cacheRatio: cacheRatio
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}}', {
|
||||||
|
ratio: modelRatio,
|
||||||
|
groupRatio: groupRatio
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -374,6 +412,8 @@ export function renderAudioModelPrice(
|
|||||||
audioRatio,
|
audioRatio,
|
||||||
audioCompletionRatio,
|
audioCompletionRatio,
|
||||||
groupRatio,
|
groupRatio,
|
||||||
|
cacheTokens = 0,
|
||||||
|
cacheRatio = 1.0,
|
||||||
) {
|
) {
|
||||||
// 1 ratio = $0.002 / 1K tokens
|
// 1 ratio = $0.002 / 1K tokens
|
||||||
if (modelPrice !== -1) {
|
if (modelPrice !== -1) {
|
||||||
@@ -388,8 +428,13 @@ export function renderAudioModelPrice(
|
|||||||
// 这里的 *2 是因为 1倍率=0.002刀,请勿删除
|
// 这里的 *2 是因为 1倍率=0.002刀,请勿删除
|
||||||
let inputRatioPrice = modelRatio * 2.0;
|
let inputRatioPrice = modelRatio * 2.0;
|
||||||
let completionRatioPrice = modelRatio * 2.0 * completionRatio;
|
let completionRatioPrice = modelRatio * 2.0 * completionRatio;
|
||||||
|
let cacheRatioPrice = modelRatio * 2.0 * cacheRatio;
|
||||||
|
|
||||||
|
// Calculate effective input tokens (non-cached + cached with ratio applied)
|
||||||
|
const effectiveInputTokens = (inputTokens - cacheTokens) + (cacheTokens * cacheRatio);
|
||||||
|
|
||||||
let price =
|
let price =
|
||||||
(inputTokens / 1000000) * inputRatioPrice * groupRatio +
|
(effectiveInputTokens / 1000000) * inputRatioPrice * groupRatio +
|
||||||
(completionTokens / 1000000) * completionRatioPrice * groupRatio +
|
(completionTokens / 1000000) * completionRatioPrice * groupRatio +
|
||||||
(audioInputTokens / 1000000) * inputRatioPrice * audioRatio * groupRatio +
|
(audioInputTokens / 1000000) * inputRatioPrice * audioRatio * groupRatio +
|
||||||
(audioCompletionTokens / 1000000) * inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio;
|
(audioCompletionTokens / 1000000) * inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio;
|
||||||
@@ -406,6 +451,14 @@ export function renderAudioModelPrice(
|
|||||||
ratio: groupRatio,
|
ratio: groupRatio,
|
||||||
total: completionRatioPrice * groupRatio
|
total: completionRatioPrice * groupRatio
|
||||||
})}</p>
|
})}</p>
|
||||||
|
{cacheTokens > 0 && (
|
||||||
|
<p>{i18next.t('缓存:${{price}} * {{ratio}} = ${{total}} / 1M tokens (缓存比例: {{cacheRatio}})', {
|
||||||
|
price: cacheRatioPrice,
|
||||||
|
ratio: groupRatio,
|
||||||
|
total: cacheRatioPrice * groupRatio,
|
||||||
|
cacheRatio: cacheRatio
|
||||||
|
})}</p>
|
||||||
|
)}
|
||||||
<p>{i18next.t('音频提示:${{price}} * {{ratio}} * {{audioRatio}} = ${{total}} / 1M tokens', {
|
<p>{i18next.t('音频提示:${{price}} * {{ratio}} * {{audioRatio}} = ${{total}} / 1M tokens', {
|
||||||
price: inputRatioPrice,
|
price: inputRatioPrice,
|
||||||
ratio: groupRatio,
|
ratio: groupRatio,
|
||||||
@@ -420,12 +473,22 @@ export function renderAudioModelPrice(
|
|||||||
total: inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio
|
total: inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio
|
||||||
})}</p>
|
})}</p>
|
||||||
<p>
|
<p>
|
||||||
{i18next.t('文字提示 {{input}} tokens / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', {
|
{cacheTokens > 0 ?
|
||||||
input: inputTokens,
|
i18next.t('文字提示 {{nonCacheInput}} tokens + 文字缓存 {{cacheInput}} tokens * {{cacheRatio}} / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', {
|
||||||
price: inputRatioPrice,
|
nonCacheInput: inputTokens - cacheTokens,
|
||||||
completion: completionTokens,
|
cacheInput: cacheTokens,
|
||||||
compPrice: completionRatioPrice
|
cacheRatio: cacheRatio,
|
||||||
})}
|
price: inputRatioPrice,
|
||||||
|
completion: completionTokens,
|
||||||
|
compPrice: completionRatioPrice
|
||||||
|
}) :
|
||||||
|
i18next.t('文字提示 {{input}} tokens / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', {
|
||||||
|
input: inputTokens,
|
||||||
|
price: inputRatioPrice,
|
||||||
|
completion: completionTokens,
|
||||||
|
compPrice: completionRatioPrice
|
||||||
|
})
|
||||||
|
}
|
||||||
</p>
|
</p>
|
||||||
<p>
|
<p>
|
||||||
{i18next.t('音频提示 {{input}} tokens / 1M tokens * ${{price}} * {{audioRatio}} + 音频补全 {{completion}} tokens / 1M tokens * ${{price}} * {{audioRatio}} * {{audioCompRatio}}', {
|
{i18next.t('音频提示 {{input}} tokens / 1M tokens * ${{price}} * {{audioRatio}} + 音频补全 {{completion}} tokens / 1M tokens * ${{price}} * {{audioRatio}} * {{audioCompRatio}}', {
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ export default function ModelRatioSettings(props) {
|
|||||||
const [inputs, setInputs] = useState({
|
const [inputs, setInputs] = useState({
|
||||||
ModelPrice: '',
|
ModelPrice: '',
|
||||||
ModelRatio: '',
|
ModelRatio: '',
|
||||||
|
CacheRatio: '',
|
||||||
CompletionRatio: '',
|
CompletionRatio: '',
|
||||||
});
|
});
|
||||||
const refForm = useRef();
|
const refForm = useRef();
|
||||||
@@ -139,6 +140,25 @@ export default function ModelRatioSettings(props) {
|
|||||||
/>
|
/>
|
||||||
</Col>
|
</Col>
|
||||||
</Row>
|
</Row>
|
||||||
|
<Row gutter={16}>
|
||||||
|
<Col span={16}>
|
||||||
|
<Form.TextArea
|
||||||
|
label={t('提示缓存倍率')}
|
||||||
|
placeholder={t('为一个 JSON 文本,键为模型名称,值为倍率')}
|
||||||
|
field={'CacheRatio'}
|
||||||
|
autosize={{ minRows: 6, maxRows: 12 }}
|
||||||
|
trigger='blur'
|
||||||
|
stopValidateWithError
|
||||||
|
rules={[
|
||||||
|
{
|
||||||
|
validator: (rule, value) => verifyJSON(value),
|
||||||
|
message: '不是合法的 JSON 字符串'
|
||||||
|
}
|
||||||
|
]}
|
||||||
|
onChange={(value) => setInputs({ ...inputs, CacheRatio: value })}
|
||||||
|
/>
|
||||||
|
</Col>
|
||||||
|
</Row>
|
||||||
<Row gutter={16}>
|
<Row gutter={16}>
|
||||||
<Col span={16}>
|
<Col span={16}>
|
||||||
<Form.TextArea
|
<Form.TextArea
|
||||||
|
|||||||
Reference in New Issue
Block a user