feat: Implement cache token ratio for more precise token pricing

This commit is contained in:
1808837298@qq.com
2025-03-08 01:30:50 +08:00
parent 81137e0533
commit 4f194f4e6a
18 changed files with 258 additions and 71 deletions

View File

@@ -158,7 +158,7 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
tok := time.Now() tok := time.Now()
milliseconds := tok.Sub(tik).Milliseconds() milliseconds := tok.Sub(tik).Milliseconds()
consumedTime := float64(milliseconds) / 1000.0 consumedTime := float64(milliseconds) / 1000.0
other := service.GenerateTextOtherInfo(c, info, priceData.ModelRatio, priceData.GroupRatio, priceData.CompletionRatio, priceData.ModelPrice) other := service.GenerateTextOtherInfo(c, info, priceData.ModelRatio, priceData.GroupRatio, priceData.CompletionRatio, 0, 0.0, priceData.ModelPrice)
model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, info.OriginModelName, "模型测试", model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, info.OriginModelName, "模型测试",
quota, "模型测试", 0, quota, int(consumedTime), false, info.Group, other) quota, "模型测试", 0, quota, int(consumedTime), false, info.Group, other)
common.SysLog(fmt.Sprintf("testing channel #%d, response: \n%s", channel.Id, string(respBody))) common.SysLog(fmt.Sprintf("testing channel #%d, response: \n%s", channel.Id, string(respBody)))

View File

@@ -4,6 +4,7 @@ import (
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"one-api/model" "one-api/model"
"one-api/setting" "one-api/setting"
"one-api/setting/operation_setting"
) )
func GetPricing(c *gin.Context) { func GetPricing(c *gin.Context) {
@@ -39,7 +40,7 @@ func GetPricing(c *gin.Context) {
} }
func ResetModelRatio(c *gin.Context) { func ResetModelRatio(c *gin.Context) {
defaultStr := setting.DefaultModelRatio2JSONString() defaultStr := operation_setting.DefaultModelRatio2JSONString()
err := model.UpdateOption("ModelRatio", defaultStr) err := model.UpdateOption("ModelRatio", defaultStr)
if err != nil { if err != nil {
c.JSON(200, gin.H{ c.JSON(200, gin.H{
@@ -48,7 +49,7 @@ func ResetModelRatio(c *gin.Context) {
}) })
return return
} }
err = setting.UpdateModelRatioByJSONString(defaultStr) err = operation_setting.UpdateModelRatioByJSONString(defaultStr)
if err != nil { if err != nil {
c.JSON(200, gin.H{ c.JSON(200, gin.H{
"success": false, "success": false,

View File

@@ -92,11 +92,12 @@ func InitOptionMap() {
common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount) common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount)
common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes) common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes)
common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount) common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount)
common.OptionMap["ModelRatio"] = setting.ModelRatio2JSONString() common.OptionMap["ModelRatio"] = operation_setting.ModelRatio2JSONString()
common.OptionMap["ModelPrice"] = setting.ModelPrice2JSONString() common.OptionMap["ModelPrice"] = operation_setting.ModelPrice2JSONString()
common.OptionMap["CacheRatio"] = operation_setting.CacheRatio2JSONString()
common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString() common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString()
common.OptionMap["UserUsableGroups"] = setting.UserUsableGroups2JSONString() common.OptionMap["UserUsableGroups"] = setting.UserUsableGroups2JSONString()
common.OptionMap["CompletionRatio"] = setting.CompletionRatio2JSONString() common.OptionMap["CompletionRatio"] = operation_setting.CompletionRatio2JSONString()
common.OptionMap["TopUpLink"] = common.TopUpLink common.OptionMap["TopUpLink"] = common.TopUpLink
common.OptionMap["ChatLink"] = common.ChatLink common.OptionMap["ChatLink"] = common.ChatLink
common.OptionMap["ChatLink2"] = common.ChatLink2 common.OptionMap["ChatLink2"] = common.ChatLink2
@@ -344,15 +345,17 @@ func updateOptionMap(key string, value string) (err error) {
case "DataExportDefaultTime": case "DataExportDefaultTime":
common.DataExportDefaultTime = value common.DataExportDefaultTime = value
case "ModelRatio": case "ModelRatio":
err = setting.UpdateModelRatioByJSONString(value) err = operation_setting.UpdateModelRatioByJSONString(value)
case "GroupRatio": case "GroupRatio":
err = setting.UpdateGroupRatioByJSONString(value) err = setting.UpdateGroupRatioByJSONString(value)
case "UserUsableGroups": case "UserUsableGroups":
err = setting.UpdateUserUsableGroupsByJSONString(value) err = setting.UpdateUserUsableGroupsByJSONString(value)
case "CompletionRatio": case "CompletionRatio":
err = setting.UpdateCompletionRatioByJSONString(value) err = operation_setting.UpdateCompletionRatioByJSONString(value)
case "ModelPrice": case "ModelPrice":
err = setting.UpdateModelPriceByJSONString(value) err = operation_setting.UpdateModelPriceByJSONString(value)
case "CacheRatio":
err = operation_setting.UpdateCacheRatioByJSONString(value)
case "TopUpLink": case "TopUpLink":
common.TopUpLink = value common.TopUpLink = value
case "ChatLink": case "ChatLink":

View File

@@ -2,7 +2,7 @@ package model
import ( import (
"one-api/common" "one-api/common"
"one-api/setting" "one-api/setting/operation_setting"
"sync" "sync"
"time" "time"
) )
@@ -65,14 +65,14 @@ func updatePricing() {
ModelName: model, ModelName: model,
EnableGroup: groups, EnableGroup: groups,
} }
modelPrice, findPrice := setting.GetModelPrice(model, false) modelPrice, findPrice := operation_setting.GetModelPrice(model, false)
if findPrice { if findPrice {
pricing.ModelPrice = modelPrice pricing.ModelPrice = modelPrice
pricing.QuotaType = 1 pricing.QuotaType = 1
} else { } else {
modelRatio, _ := setting.GetModelRatio(model) modelRatio, _ := operation_setting.GetModelRatio(model)
pricing.ModelRatio = modelRatio pricing.ModelRatio = modelRatio
pricing.CompletionRatio = setting.GetCompletionRatio(model) pricing.CompletionRatio = operation_setting.GetCompletionRatio(model)
pricing.QuotaType = 0 pricing.QuotaType = 0
} }
pricingMap = append(pricingMap, pricing) pricingMap = append(pricingMap, pricing)

View File

@@ -6,30 +6,33 @@ import (
"one-api/common" "one-api/common"
relaycommon "one-api/relay/common" relaycommon "one-api/relay/common"
"one-api/setting" "one-api/setting"
"one-api/setting/operation_setting"
) )
type PriceData struct { type PriceData struct {
ModelPrice float64 ModelPrice float64
ModelRatio float64 ModelRatio float64
CompletionRatio float64 CompletionRatio float64
CacheRatio float64
GroupRatio float64 GroupRatio float64
UsePrice bool UsePrice bool
ShouldPreConsumedQuota int ShouldPreConsumedQuota int
} }
func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, maxTokens int) (PriceData, error) { func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, maxTokens int) (PriceData, error) {
modelPrice, usePrice := setting.GetModelPrice(info.OriginModelName, false) modelPrice, usePrice := operation_setting.GetModelPrice(info.OriginModelName, false)
groupRatio := setting.GetGroupRatio(info.Group) groupRatio := setting.GetGroupRatio(info.Group)
var preConsumedQuota int var preConsumedQuota int
var modelRatio float64 var modelRatio float64
var completionRatio float64 var completionRatio float64
var cacheRatio float64
if !usePrice { if !usePrice {
preConsumedTokens := common.PreConsumedQuota preConsumedTokens := common.PreConsumedQuota
if maxTokens != 0 { if maxTokens != 0 {
preConsumedTokens = promptTokens + maxTokens preConsumedTokens = promptTokens + maxTokens
} }
var success bool var success bool
modelRatio, success = setting.GetModelRatio(info.OriginModelName) modelRatio, success = operation_setting.GetModelRatio(info.OriginModelName)
if !success { if !success {
if info.UserId == 1 { if info.UserId == 1 {
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置请设置或开始自用模式Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName) return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置请设置或开始自用模式Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName)
@@ -37,7 +40,8 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置, 请联系管理员设置Model %s ratio or price not set, please contact administrator to set", info.OriginModelName, info.OriginModelName) return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置, 请联系管理员设置Model %s ratio or price not set, please contact administrator to set", info.OriginModelName, info.OriginModelName)
} }
} }
completionRatio = setting.GetCompletionRatio(info.OriginModelName) completionRatio = operation_setting.GetCompletionRatio(info.OriginModelName)
cacheRatio, _ = operation_setting.GetCacheRatio(info.OriginModelName)
ratio := modelRatio * groupRatio ratio := modelRatio * groupRatio
preConsumedQuota = int(float64(preConsumedTokens) * ratio) preConsumedQuota = int(float64(preConsumedTokens) * ratio)
} else { } else {
@@ -49,6 +53,7 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
CompletionRatio: completionRatio, CompletionRatio: completionRatio,
GroupRatio: groupRatio, GroupRatio: groupRatio,
UsePrice: usePrice, UsePrice: usePrice,
CacheRatio: cacheRatio,
ShouldPreConsumedQuota: preConsumedQuota, ShouldPreConsumedQuota: preConsumedQuota,
}, nil }, nil
} }

View File

@@ -15,6 +15,7 @@ import (
relayconstant "one-api/relay/constant" relayconstant "one-api/relay/constant"
"one-api/service" "one-api/service"
"one-api/setting" "one-api/setting"
"one-api/setting/operation_setting"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@@ -157,10 +158,10 @@ func RelaySwapFace(c *gin.Context) *dto.MidjourneyResponse {
return service.MidjourneyErrorWrapper(constant.MjRequestError, "sour_base64_and_target_base64_is_required") return service.MidjourneyErrorWrapper(constant.MjRequestError, "sour_base64_and_target_base64_is_required")
} }
modelName := service.CoverActionToModelName(constant.MjActionSwapFace) modelName := service.CoverActionToModelName(constant.MjActionSwapFace)
modelPrice, success := setting.GetModelPrice(modelName, true) modelPrice, success := operation_setting.GetModelPrice(modelName, true)
// 如果没有配置价格,则使用默认价格 // 如果没有配置价格,则使用默认价格
if !success { if !success {
defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName] defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName]
if !ok { if !ok {
modelPrice = 0.1 modelPrice = 0.1
} else { } else {
@@ -463,10 +464,10 @@ func RelayMidjourneySubmit(c *gin.Context, relayMode int) *dto.MidjourneyRespons
fullRequestURL := fmt.Sprintf("%s%s", baseURL, requestURL) fullRequestURL := fmt.Sprintf("%s%s", baseURL, requestURL)
modelName := service.CoverActionToModelName(midjRequest.Action) modelName := service.CoverActionToModelName(midjRequest.Action)
modelPrice, success := setting.GetModelPrice(modelName, true) modelPrice, success := operation_setting.GetModelPrice(modelName, true)
// 如果没有配置价格,则使用默认价格 // 如果没有配置价格,则使用默认价格
if !success { if !success {
defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName] defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName]
if !ok { if !ok {
modelPrice = 0.1 modelPrice = 0.1
} else { } else {

View File

@@ -304,24 +304,26 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
CompletionTokens: 0, CompletionTokens: 0,
TotalTokens: relayInfo.PromptTokens, TotalTokens: relayInfo.PromptTokens,
} }
extraContent += " (可能是请求出错)" extraContent += "(可能是请求出错)"
} }
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix() useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
promptTokens := usage.PromptTokens promptTokens := usage.PromptTokens
cacheTokens := usage.PromptTokensDetails.CachedTokens
completionTokens := usage.CompletionTokens completionTokens := usage.CompletionTokens
modelName := relayInfo.OriginModelName modelName := relayInfo.OriginModelName
tokenName := ctx.GetString("token_name") tokenName := ctx.GetString("token_name")
completionRatio := setting.GetCompletionRatio(modelName) completionRatio := priceData.CompletionRatio
cacheRatio := priceData.CacheRatio
ratio := priceData.ModelRatio * priceData.GroupRatio ratio := priceData.ModelRatio * priceData.GroupRatio
modelRatio := priceData.ModelRatio modelRatio := priceData.ModelRatio
groupRatio := priceData.GroupRatio groupRatio := priceData.GroupRatio
modelPrice := priceData.ModelPrice modelPrice := priceData.ModelPrice
usePrice := priceData.UsePrice
quota := 0 quota := 0
if !priceData.UsePrice { if !priceData.UsePrice {
quota = promptTokens + int(math.Round(float64(completionTokens)*completionRatio)) quota = (promptTokens - cacheTokens) + int(math.Round(float64(cacheTokens)*cacheRatio))
quota += int(math.Round(float64(completionTokens) * completionRatio))
quota = int(math.Round(float64(quota) * ratio)) quota = int(math.Round(float64(quota) * ratio))
if ratio != 0 && quota <= 0 { if ratio != 0 && quota <= 0 {
quota = 1 quota = 1
@@ -330,8 +332,9 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
quota = int(modelPrice * common.QuotaPerUnit * groupRatio) quota = int(modelPrice * common.QuotaPerUnit * groupRatio)
} }
totalTokens := promptTokens + completionTokens totalTokens := promptTokens + completionTokens
var logContent string var logContent string
if !usePrice { if !priceData.UsePrice {
logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio) logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio)
} else { } else {
logContent = fmt.Sprintf("模型价格 %.2f,分组倍率 %.2f", modelPrice, groupRatio) logContent = fmt.Sprintf("模型价格 %.2f,分组倍率 %.2f", modelPrice, groupRatio)
@@ -372,7 +375,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
if extraContent != "" { if extraContent != "" {
logContent += ", " + extraContent logContent += ", " + extraContent
} }
other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice) other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice)
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel, model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel,
tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other) tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)

View File

@@ -16,6 +16,7 @@ import (
relayconstant "one-api/relay/constant" relayconstant "one-api/relay/constant"
"one-api/service" "one-api/service"
"one-api/setting" "one-api/setting"
"one-api/setting/operation_setting"
) )
/* /*
@@ -37,9 +38,9 @@ func RelayTaskSubmit(c *gin.Context, relayMode int) (taskErr *dto.TaskError) {
} }
modelName := service.CoverTaskActionToModelName(platform, relayInfo.Action) modelName := service.CoverTaskActionToModelName(platform, relayInfo.Action)
modelPrice, success := setting.GetModelPrice(modelName, true) modelPrice, success := operation_setting.GetModelPrice(modelName, true)
if !success { if !success {
defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName] defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName]
if !ok { if !ok {
modelPrice = 0.1 modelPrice = 0.1
} else { } else {

View File

@@ -11,6 +11,7 @@ import (
relaycommon "one-api/relay/common" relaycommon "one-api/relay/common"
"one-api/service" "one-api/service"
"one-api/setting" "one-api/setting"
"one-api/setting/operation_setting"
) )
func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWithStatusCode) { func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWithStatusCode) {
@@ -39,7 +40,7 @@ func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWi
} }
} }
//relayInfo.UpstreamModelName = textRequest.Model //relayInfo.UpstreamModelName = textRequest.Model
modelPrice, getModelPriceSuccess := setting.GetModelPrice(relayInfo.UpstreamModelName, false) modelPrice, getModelPriceSuccess := operation_setting.GetModelPrice(relayInfo.UpstreamModelName, false)
groupRatio := setting.GetGroupRatio(relayInfo.Group) groupRatio := setting.GetGroupRatio(relayInfo.Group)
var preConsumedQuota int var preConsumedQuota int
@@ -65,7 +66,7 @@ func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWi
//if realtimeEvent.Session.MaxResponseOutputTokens != 0 { //if realtimeEvent.Session.MaxResponseOutputTokens != 0 {
// preConsumedTokens = promptTokens + int(realtimeEvent.Session.MaxResponseOutputTokens) // preConsumedTokens = promptTokens + int(realtimeEvent.Session.MaxResponseOutputTokens)
//} //}
modelRatio, _ = setting.GetModelRatio(relayInfo.UpstreamModelName) modelRatio, _ = operation_setting.GetModelRatio(relayInfo.UpstreamModelName)
ratio = modelRatio * groupRatio ratio = modelRatio * groupRatio
preConsumedQuota = int(float64(preConsumedTokens) * ratio) preConsumedQuota = int(float64(preConsumedTokens) * ratio)
} else { } else {

View File

@@ -1,16 +1,20 @@
package service package service
import ( import (
"github.com/gin-gonic/gin"
"one-api/dto" "one-api/dto"
relaycommon "one-api/relay/common" relaycommon "one-api/relay/common"
"github.com/gin-gonic/gin"
) )
func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio, modelPrice float64) map[string]interface{} { func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio float64,
cacheTokens int, cacheRatio float64, modelPrice float64) map[string]interface{} {
other := make(map[string]interface{}) other := make(map[string]interface{})
other["model_ratio"] = modelRatio other["model_ratio"] = modelRatio
other["group_ratio"] = groupRatio other["group_ratio"] = groupRatio
other["completion_ratio"] = completionRatio other["completion_ratio"] = completionRatio
other["cache_tokens"] = cacheTokens
other["cache_ratio"] = cacheRatio
other["model_price"] = modelPrice other["model_price"] = modelPrice
other["frt"] = float64(relayInfo.FirstResponseTime.UnixMilli() - relayInfo.StartTime.UnixMilli()) other["frt"] = float64(relayInfo.FirstResponseTime.UnixMilli() - relayInfo.StartTime.UnixMilli())
if relayInfo.ReasoningEffort != "" { if relayInfo.ReasoningEffort != "" {
@@ -27,7 +31,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m
} }
func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} { func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice) info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice)
info["ws"] = true info["ws"] = true
info["audio_input"] = usage.InputTokenDetails.AudioTokens info["audio_input"] = usage.InputTokenDetails.AudioTokens
info["audio_output"] = usage.OutputTokenDetails.AudioTokens info["audio_output"] = usage.OutputTokenDetails.AudioTokens
@@ -39,7 +43,7 @@ func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, us
} }
func GenerateAudioOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} { func GenerateAudioOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice) info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice)
info["audio"] = true info["audio"] = true
info["audio_input"] = usage.PromptTokensDetails.AudioTokens info["audio_input"] = usage.PromptTokensDetails.AudioTokens
info["audio_output"] = usage.CompletionTokenDetails.AudioTokens info["audio_output"] = usage.CompletionTokenDetails.AudioTokens

View File

@@ -12,6 +12,7 @@ import (
relaycommon "one-api/relay/common" relaycommon "one-api/relay/common"
"one-api/relay/helper" "one-api/relay/helper"
"one-api/setting" "one-api/setting"
"one-api/setting/operation_setting"
"strings" "strings"
"time" "time"
@@ -38,9 +39,9 @@ func calculateAudioQuota(info QuotaInfo) int {
return int(info.ModelPrice * common.QuotaPerUnit * info.GroupRatio) return int(info.ModelPrice * common.QuotaPerUnit * info.GroupRatio)
} }
completionRatio := setting.GetCompletionRatio(info.ModelName) completionRatio := operation_setting.GetCompletionRatio(info.ModelName)
audioRatio := setting.GetAudioRatio(info.ModelName) audioRatio := operation_setting.GetAudioRatio(info.ModelName)
audioCompletionRatio := setting.GetAudioCompletionRatio(info.ModelName) audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName)
ratio := info.GroupRatio * info.ModelRatio ratio := info.GroupRatio * info.ModelRatio
quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio)) quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio))
@@ -75,7 +76,7 @@ func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usag
audioInputTokens := usage.InputTokenDetails.AudioTokens audioInputTokens := usage.InputTokenDetails.AudioTokens
audioOutTokens := usage.OutputTokenDetails.AudioTokens audioOutTokens := usage.OutputTokenDetails.AudioTokens
groupRatio := setting.GetGroupRatio(relayInfo.Group) groupRatio := setting.GetGroupRatio(relayInfo.Group)
modelRatio, _ := setting.GetModelRatio(modelName) modelRatio, _ := operation_setting.GetModelRatio(modelName)
quotaInfo := QuotaInfo{ quotaInfo := QuotaInfo{
InputDetails: TokenDetails{ InputDetails: TokenDetails{
@@ -122,9 +123,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
audioOutTokens := usage.OutputTokenDetails.AudioTokens audioOutTokens := usage.OutputTokenDetails.AudioTokens
tokenName := ctx.GetString("token_name") tokenName := ctx.GetString("token_name")
completionRatio := setting.GetCompletionRatio(modelName) completionRatio := operation_setting.GetCompletionRatio(modelName)
audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName) audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName)
audioCompletionRatio := setting.GetAudioCompletionRatio(modelName) audioCompletionRatio := operation_setting.GetAudioCompletionRatio(modelName)
quotaInfo := QuotaInfo{ quotaInfo := QuotaInfo{
InputDetails: TokenDetails{ InputDetails: TokenDetails{
@@ -184,9 +185,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
audioOutTokens := usage.CompletionTokenDetails.AudioTokens audioOutTokens := usage.CompletionTokenDetails.AudioTokens
tokenName := ctx.GetString("token_name") tokenName := ctx.GetString("token_name")
completionRatio := setting.GetCompletionRatio(relayInfo.OriginModelName) completionRatio := operation_setting.GetCompletionRatio(relayInfo.OriginModelName)
audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName) audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName)
audioCompletionRatio := setting.GetAudioCompletionRatio(relayInfo.OriginModelName) audioCompletionRatio := operation_setting.GetAudioCompletionRatio(relayInfo.OriginModelName)
modelRatio := priceData.ModelRatio modelRatio := priceData.ModelRatio
groupRatio := priceData.GroupRatio groupRatio := priceData.GroupRatio

View File

@@ -10,7 +10,7 @@ import (
"one-api/constant" "one-api/constant"
"one-api/dto" "one-api/dto"
relaycommon "one-api/relay/common" relaycommon "one-api/relay/common"
"one-api/setting" "one-api/setting/operation_setting"
"strings" "strings"
"unicode/utf8" "unicode/utf8"
@@ -33,7 +33,7 @@ func InitTokenEncoders() {
if err != nil { if err != nil {
common.FatalLog(fmt.Sprintf("failed to get gpt-4o token encoder: %s", err.Error())) common.FatalLog(fmt.Sprintf("failed to get gpt-4o token encoder: %s", err.Error()))
} }
for model, _ := range setting.GetDefaultModelRatioMap() { for model, _ := range operation_setting.GetDefaultModelRatioMap() {
if strings.HasPrefix(model, "gpt-3.5") { if strings.HasPrefix(model, "gpt-3.5") {
tokenEncoderMap[model] = cl100TokenEncoder tokenEncoderMap[model] = cl100TokenEncoder
} else if strings.HasPrefix(model, "gpt-4") { } else if strings.HasPrefix(model, "gpt-4") {

View File

@@ -0,0 +1,77 @@
package operation_setting
import (
"encoding/json"
"one-api/common"
"sync"
)
var defaultCacheRatio = map[string]float64{
"gpt-4": 0.5,
"o1-2024-12-17": 0.5,
"o1-preview-2024-09-12": 0.5,
"o1-mini-2024-09-12": 0.5,
"gpt-4o-2024-11-20": 0.5,
"gpt-4o-2024-08-06": 0.5,
"gpt-4o-mini-2024-07-18": 0.5,
"gpt-4o-realtime-preview": 0.5,
"gpt-4o-mini-realtime-preview": 0.5,
"deepseek-chat": 0.5,
"deepseek-reasoner": 0.5,
"deepseek-coder": 0.5,
}
var cacheRatioMap map[string]float64
var cacheRatioMapMutex sync.RWMutex
// GetCacheRatioMap returns the cache ratio map
func GetCacheRatioMap() map[string]float64 {
cacheRatioMapMutex.Lock()
defer cacheRatioMapMutex.Unlock()
if cacheRatioMap == nil {
cacheRatioMap = defaultCacheRatio
}
return cacheRatioMap
}
// CacheRatio2JSONString converts the cache ratio map to a JSON string
func CacheRatio2JSONString() string {
GetCacheRatioMap()
jsonBytes, err := json.Marshal(cacheRatioMap)
if err != nil {
common.SysError("error marshalling cache ratio: " + err.Error())
}
return string(jsonBytes)
}
// UpdateCacheRatioByJSONString updates the cache ratio map from a JSON string
func UpdateCacheRatioByJSONString(jsonStr string) error {
cacheRatioMapMutex.Lock()
defer cacheRatioMapMutex.Unlock()
cacheRatioMap = make(map[string]float64)
return json.Unmarshal([]byte(jsonStr), &cacheRatioMap)
}
// GetCacheRatio returns the cache ratio for a model
func GetCacheRatio(name string) (float64, bool) {
GetCacheRatioMap()
ratio, ok := cacheRatioMap[name]
if !ok {
return 0.5, false // Default to 0.5 if not found
}
return ratio, true
}
// DefaultCacheRatio2JSONString converts the default cache ratio map to a JSON string
func DefaultCacheRatio2JSONString() string {
jsonBytes, err := json.Marshal(defaultCacheRatio)
if err != nil {
common.SysError("error marshalling default cache ratio: " + err.Error())
}
return string(jsonBytes)
}
// GetDefaultCacheRatioMap returns the default cache ratio map
func GetDefaultCacheRatioMap() map[string]float64 {
return defaultCacheRatio
}

View File

@@ -1,9 +1,8 @@
package setting package operation_setting
import ( import (
"encoding/json" "encoding/json"
"one-api/common" "one-api/common"
"one-api/setting/operation_setting"
"strings" "strings"
"sync" "sync"
) )
@@ -326,7 +325,7 @@ func GetModelRatio(name string) (float64, bool) {
} }
ratio, ok := modelRatioMap[name] ratio, ok := modelRatioMap[name]
if !ok { if !ok {
return 37.5, operation_setting.SelfUseModeEnabled return 37.5, SelfUseModeEnabled
} }
return ratio, true return ratio, true
} }

View File

@@ -464,6 +464,8 @@ const LogsTable = () => {
other.model_ratio, other.model_ratio,
other.model_price, other.model_price,
other.group_ratio, other.group_ratio,
other.cache_tokens || 0,
other.cache_ratio || 1.0,
); );
return ( return (
<Paragraph <Paragraph
@@ -665,6 +667,8 @@ const LogsTable = () => {
other?.audio_ratio, other?.audio_ratio,
other?.audio_completion_ratio, other?.audio_completion_ratio,
other.group_ratio, other.group_ratio,
other.cache_tokens || 0,
other.cache_ratio || 1.0,
); );
} else { } else {
content = renderModelPrice( content = renderModelPrice(
@@ -674,6 +678,8 @@ const LogsTable = () => {
other.model_price, other.model_price,
other.completion_ratio, other.completion_ratio,
other.group_ratio, other.group_ratio,
other.cache_tokens || 0,
other.cache_ratio || 1.0,
); );
} }
expandDataLocal.push({ expandDataLocal.push({

View File

@@ -28,6 +28,7 @@ const OperationSetting = () => {
PreConsumedQuota: 0, PreConsumedQuota: 0,
StreamCacheQueueLength: 0, StreamCacheQueueLength: 0,
ModelRatio: '', ModelRatio: '',
CacheRatio: '',
CompletionRatio: '', CompletionRatio: '',
ModelPrice: '', ModelPrice: '',
GroupRatio: '', GroupRatio: '',
@@ -77,7 +78,8 @@ const OperationSetting = () => {
item.key === 'GroupRatio' || item.key === 'GroupRatio' ||
item.key === 'UserUsableGroups' || item.key === 'UserUsableGroups' ||
item.key === 'CompletionRatio' || item.key === 'CompletionRatio' ||
item.key === 'ModelPrice' item.key === 'ModelPrice' ||
item.key === 'CacheRatio'
) { ) {
item.value = JSON.stringify(JSON.parse(item.value), null, 2); item.value = JSON.stringify(JSON.parse(item.value), null, 2);
} }

View File

@@ -298,6 +298,8 @@ export function renderModelPrice(
modelPrice = -1, modelPrice = -1,
completionRatio, completionRatio,
groupRatio, groupRatio,
cacheTokens = 0,
cacheRatio = 1.0,
) { ) {
if (modelPrice !== -1) { if (modelPrice !== -1) {
return i18next.t('模型价格:${{price}} * 分组倍率:{{ratio}} = ${{total}}', { return i18next.t('模型价格:${{price}} * 分组倍率:{{ratio}} = ${{total}}', {
@@ -311,9 +313,15 @@ export function renderModelPrice(
} }
let inputRatioPrice = modelRatio * 2.0; let inputRatioPrice = modelRatio * 2.0;
let completionRatioPrice = modelRatio * 2.0 * completionRatio; let completionRatioPrice = modelRatio * 2.0 * completionRatio;
let cacheRatioPrice = modelRatio * 2.0 * cacheRatio;
// Calculate effective input tokens (non-cached + cached with ratio applied)
const effectiveInputTokens = (inputTokens - cacheTokens) + (cacheTokens * cacheRatio);
let price = let price =
(inputTokens / 1000000) * inputRatioPrice * groupRatio + (effectiveInputTokens / 1000000) * inputRatioPrice * groupRatio +
(completionTokens / 1000000) * completionRatioPrice * groupRatio; (completionTokens / 1000000) * completionRatioPrice * groupRatio;
return ( return (
<> <>
<article> <article>
@@ -327,16 +335,36 @@ export function renderModelPrice(
ratio: groupRatio, ratio: groupRatio,
total: completionRatioPrice * groupRatio total: completionRatioPrice * groupRatio
})}</p> })}</p>
{cacheTokens > 0 && (
<p>{i18next.t('缓存:${{price}} * {{ratio}} = ${{total}} / 1M tokens (缓存比例: {{cacheRatio}})', {
price: cacheRatioPrice,
ratio: groupRatio,
total: cacheRatioPrice * groupRatio,
cacheRatio: cacheRatio
})}</p>
)}
<p></p> <p></p>
<p> <p>
{i18next.t('提示 {{input}} tokens / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', { {cacheTokens > 0 ?
input: inputTokens, i18next.t('提示 {{nonCacheInput}} tokens + 缓存 {{cacheInput}} tokens * {{cacheRatio}} / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', {
price: inputRatioPrice, nonCacheInput: inputTokens - cacheTokens,
completion: completionTokens, cacheInput: cacheTokens,
compPrice: completionRatioPrice, cacheRatio: cacheRatio,
ratio: groupRatio, price: inputRatioPrice,
total: price.toFixed(6) completion: completionTokens,
})} compPrice: completionRatioPrice,
ratio: groupRatio,
total: price.toFixed(6)
}) :
i18next.t('提示 {{input}} tokens / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', {
input: inputTokens,
price: inputRatioPrice,
completion: completionTokens,
compPrice: completionRatioPrice,
ratio: groupRatio,
total: price.toFixed(6)
})
}
</p> </p>
<p>{i18next.t('仅供参考,以实际扣费为准')}</p> <p>{i18next.t('仅供参考,以实际扣费为准')}</p>
</article> </article>
@@ -349,6 +377,8 @@ export function renderModelPriceSimple(
modelRatio, modelRatio,
modelPrice = -1, modelPrice = -1,
groupRatio, groupRatio,
cacheTokens = 0,
cacheRatio = 1.0,
) { ) {
if (modelPrice !== -1) { if (modelPrice !== -1) {
return i18next.t('价格:${{price}} * 分组:{{ratio}}', { return i18next.t('价格:${{price}} * 分组:{{ratio}}', {
@@ -356,10 +386,18 @@ export function renderModelPriceSimple(
ratio: groupRatio ratio: groupRatio
}); });
} else { } else {
return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}}', { if (cacheTokens !== 0) {
ratio: modelRatio, return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}} * 缓存比例: {{cacheRatio}}', {
groupRatio: groupRatio ratio: modelRatio,
}); groupRatio: groupRatio,
cacheRatio: cacheRatio
});
} else {
return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}}', {
ratio: modelRatio,
groupRatio: groupRatio
});
}
} }
} }
@@ -374,6 +412,8 @@ export function renderAudioModelPrice(
audioRatio, audioRatio,
audioCompletionRatio, audioCompletionRatio,
groupRatio, groupRatio,
cacheTokens = 0,
cacheRatio = 1.0,
) { ) {
// 1 ratio = $0.002 / 1K tokens // 1 ratio = $0.002 / 1K tokens
if (modelPrice !== -1) { if (modelPrice !== -1) {
@@ -388,8 +428,13 @@ export function renderAudioModelPrice(
// 这里的 *2 是因为 1倍率=0.002刀,请勿删除 // 这里的 *2 是因为 1倍率=0.002刀,请勿删除
let inputRatioPrice = modelRatio * 2.0; let inputRatioPrice = modelRatio * 2.0;
let completionRatioPrice = modelRatio * 2.0 * completionRatio; let completionRatioPrice = modelRatio * 2.0 * completionRatio;
let cacheRatioPrice = modelRatio * 2.0 * cacheRatio;
// Calculate effective input tokens (non-cached + cached with ratio applied)
const effectiveInputTokens = (inputTokens - cacheTokens) + (cacheTokens * cacheRatio);
let price = let price =
(inputTokens / 1000000) * inputRatioPrice * groupRatio + (effectiveInputTokens / 1000000) * inputRatioPrice * groupRatio +
(completionTokens / 1000000) * completionRatioPrice * groupRatio + (completionTokens / 1000000) * completionRatioPrice * groupRatio +
(audioInputTokens / 1000000) * inputRatioPrice * audioRatio * groupRatio + (audioInputTokens / 1000000) * inputRatioPrice * audioRatio * groupRatio +
(audioCompletionTokens / 1000000) * inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio; (audioCompletionTokens / 1000000) * inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio;
@@ -406,6 +451,14 @@ export function renderAudioModelPrice(
ratio: groupRatio, ratio: groupRatio,
total: completionRatioPrice * groupRatio total: completionRatioPrice * groupRatio
})}</p> })}</p>
{cacheTokens > 0 && (
<p>{i18next.t('缓存:${{price}} * {{ratio}} = ${{total}} / 1M tokens (缓存比例: {{cacheRatio}})', {
price: cacheRatioPrice,
ratio: groupRatio,
total: cacheRatioPrice * groupRatio,
cacheRatio: cacheRatio
})}</p>
)}
<p>{i18next.t('音频提示:${{price}} * {{ratio}} * {{audioRatio}} = ${{total}} / 1M tokens', { <p>{i18next.t('音频提示:${{price}} * {{ratio}} * {{audioRatio}} = ${{total}} / 1M tokens', {
price: inputRatioPrice, price: inputRatioPrice,
ratio: groupRatio, ratio: groupRatio,
@@ -420,12 +473,22 @@ export function renderAudioModelPrice(
total: inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio total: inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio
})}</p> })}</p>
<p> <p>
{i18next.t('文字提示 {{input}} tokens / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', { {cacheTokens > 0 ?
input: inputTokens, i18next.t('文字提示 {{nonCacheInput}} tokens + 文字缓存 {{cacheInput}} tokens * {{cacheRatio}} / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', {
price: inputRatioPrice, nonCacheInput: inputTokens - cacheTokens,
completion: completionTokens, cacheInput: cacheTokens,
compPrice: completionRatioPrice cacheRatio: cacheRatio,
})} price: inputRatioPrice,
completion: completionTokens,
compPrice: completionRatioPrice
}) :
i18next.t('文字提示 {{input}} tokens / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', {
input: inputTokens,
price: inputRatioPrice,
completion: completionTokens,
compPrice: completionRatioPrice
})
}
</p> </p>
<p> <p>
{i18next.t('音频提示 {{input}} tokens / 1M tokens * ${{price}} * {{audioRatio}} + 音频补全 {{completion}} tokens / 1M tokens * ${{price}} * {{audioRatio}} * {{audioCompRatio}}', { {i18next.t('音频提示 {{input}} tokens / 1M tokens * ${{price}} * {{audioRatio}} + 音频补全 {{completion}} tokens / 1M tokens * ${{price}} * {{audioRatio}} * {{audioCompRatio}}', {

View File

@@ -15,6 +15,7 @@ export default function ModelRatioSettings(props) {
const [inputs, setInputs] = useState({ const [inputs, setInputs] = useState({
ModelPrice: '', ModelPrice: '',
ModelRatio: '', ModelRatio: '',
CacheRatio: '',
CompletionRatio: '', CompletionRatio: '',
}); });
const refForm = useRef(); const refForm = useRef();
@@ -139,6 +140,25 @@ export default function ModelRatioSettings(props) {
/> />
</Col> </Col>
</Row> </Row>
<Row gutter={16}>
<Col span={16}>
<Form.TextArea
label={t('提示缓存倍率')}
placeholder={t('为一个 JSON 文本,键为模型名称,值为倍率')}
field={'CacheRatio'}
autosize={{ minRows: 6, maxRows: 12 }}
trigger='blur'
stopValidateWithError
rules={[
{
validator: (rule, value) => verifyJSON(value),
message: '不是合法的 JSON 字符串'
}
]}
onChange={(value) => setInputs({ ...inputs, CacheRatio: value })}
/>
</Col>
</Row>
<Row gutter={16}> <Row gutter={16}>
<Col span={16}> <Col span={16}>
<Form.TextArea <Form.TextArea