From 4f194f4e6a1559d8eb5ecf5d0886c079a91e39cf Mon Sep 17 00:00:00 2001 From: "1808837298@qq.com" <1808837298@qq.com> Date: Sat, 8 Mar 2025 01:30:50 +0800 Subject: [PATCH] feat: Implement cache token ratio for more precise token pricing --- controller/channel-test.go | 2 +- controller/pricing.go | 5 +- model/option.go | 15 ++- model/pricing.go | 8 +- relay/helper/price.go | 11 +- relay/relay-mj.go | 9 +- relay/relay-text.go | 17 +-- relay/relay_task.go | 5 +- relay/websocket.go | 5 +- service/log_info_generate.go | 12 +- service/quota.go | 21 ++-- service/token_counter.go | 4 +- setting/operation_setting/cache_ratio.go | 77 +++++++++++++ .../{ => operation_setting}/model-ratio.go | 5 +- web/src/components/LogsTable.js | 6 + web/src/components/OperationSetting.js | 4 +- web/src/helpers/render.js | 103 ++++++++++++++---- .../Setting/Operation/ModelRatioSettings.js | 20 ++++ 18 files changed, 258 insertions(+), 71 deletions(-) create mode 100644 setting/operation_setting/cache_ratio.go rename setting/{ => operation_setting}/model-ratio.go (99%) diff --git a/controller/channel-test.go b/controller/channel-test.go index 98623a76..02a30593 100644 --- a/controller/channel-test.go +++ b/controller/channel-test.go @@ -158,7 +158,7 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr tok := time.Now() milliseconds := tok.Sub(tik).Milliseconds() consumedTime := float64(milliseconds) / 1000.0 - other := service.GenerateTextOtherInfo(c, info, priceData.ModelRatio, priceData.GroupRatio, priceData.CompletionRatio, priceData.ModelPrice) + other := service.GenerateTextOtherInfo(c, info, priceData.ModelRatio, priceData.GroupRatio, priceData.CompletionRatio, 0, 0.0, priceData.ModelPrice) model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, info.OriginModelName, "模型测试", quota, "模型测试", 0, quota, int(consumedTime), false, info.Group, other) common.SysLog(fmt.Sprintf("testing channel #%d, response: \n%s", channel.Id, string(respBody))) diff --git a/controller/pricing.go b/controller/pricing.go index 97f27490..1cbfe731 100644 --- a/controller/pricing.go +++ b/controller/pricing.go @@ -4,6 +4,7 @@ import ( "github.com/gin-gonic/gin" "one-api/model" "one-api/setting" + "one-api/setting/operation_setting" ) func GetPricing(c *gin.Context) { @@ -39,7 +40,7 @@ func GetPricing(c *gin.Context) { } func ResetModelRatio(c *gin.Context) { - defaultStr := setting.DefaultModelRatio2JSONString() + defaultStr := operation_setting.DefaultModelRatio2JSONString() err := model.UpdateOption("ModelRatio", defaultStr) if err != nil { c.JSON(200, gin.H{ @@ -48,7 +49,7 @@ func ResetModelRatio(c *gin.Context) { }) return } - err = setting.UpdateModelRatioByJSONString(defaultStr) + err = operation_setting.UpdateModelRatioByJSONString(defaultStr) if err != nil { c.JSON(200, gin.H{ "success": false, diff --git a/model/option.go b/model/option.go index a184c069..fe12eab1 100644 --- a/model/option.go +++ b/model/option.go @@ -92,11 +92,12 @@ func InitOptionMap() { common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount) common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes) common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount) - common.OptionMap["ModelRatio"] = setting.ModelRatio2JSONString() - common.OptionMap["ModelPrice"] = setting.ModelPrice2JSONString() + common.OptionMap["ModelRatio"] = operation_setting.ModelRatio2JSONString() + common.OptionMap["ModelPrice"] = operation_setting.ModelPrice2JSONString() + common.OptionMap["CacheRatio"] = operation_setting.CacheRatio2JSONString() common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString() common.OptionMap["UserUsableGroups"] = setting.UserUsableGroups2JSONString() - common.OptionMap["CompletionRatio"] = setting.CompletionRatio2JSONString() + common.OptionMap["CompletionRatio"] = operation_setting.CompletionRatio2JSONString() common.OptionMap["TopUpLink"] = common.TopUpLink common.OptionMap["ChatLink"] = common.ChatLink common.OptionMap["ChatLink2"] = common.ChatLink2 @@ -344,15 +345,17 @@ func updateOptionMap(key string, value string) (err error) { case "DataExportDefaultTime": common.DataExportDefaultTime = value case "ModelRatio": - err = setting.UpdateModelRatioByJSONString(value) + err = operation_setting.UpdateModelRatioByJSONString(value) case "GroupRatio": err = setting.UpdateGroupRatioByJSONString(value) case "UserUsableGroups": err = setting.UpdateUserUsableGroupsByJSONString(value) case "CompletionRatio": - err = setting.UpdateCompletionRatioByJSONString(value) + err = operation_setting.UpdateCompletionRatioByJSONString(value) case "ModelPrice": - err = setting.UpdateModelPriceByJSONString(value) + err = operation_setting.UpdateModelPriceByJSONString(value) + case "CacheRatio": + err = operation_setting.UpdateCacheRatioByJSONString(value) case "TopUpLink": common.TopUpLink = value case "ChatLink": diff --git a/model/pricing.go b/model/pricing.go index 2d0aa1b7..ba1815e2 100644 --- a/model/pricing.go +++ b/model/pricing.go @@ -2,7 +2,7 @@ package model import ( "one-api/common" - "one-api/setting" + "one-api/setting/operation_setting" "sync" "time" ) @@ -65,14 +65,14 @@ func updatePricing() { ModelName: model, EnableGroup: groups, } - modelPrice, findPrice := setting.GetModelPrice(model, false) + modelPrice, findPrice := operation_setting.GetModelPrice(model, false) if findPrice { pricing.ModelPrice = modelPrice pricing.QuotaType = 1 } else { - modelRatio, _ := setting.GetModelRatio(model) + modelRatio, _ := operation_setting.GetModelRatio(model) pricing.ModelRatio = modelRatio - pricing.CompletionRatio = setting.GetCompletionRatio(model) + pricing.CompletionRatio = operation_setting.GetCompletionRatio(model) pricing.QuotaType = 0 } pricingMap = append(pricingMap, pricing) diff --git a/relay/helper/price.go b/relay/helper/price.go index 51f64082..b169df98 100644 --- a/relay/helper/price.go +++ b/relay/helper/price.go @@ -6,30 +6,33 @@ import ( "one-api/common" relaycommon "one-api/relay/common" "one-api/setting" + "one-api/setting/operation_setting" ) type PriceData struct { ModelPrice float64 ModelRatio float64 CompletionRatio float64 + CacheRatio float64 GroupRatio float64 UsePrice bool ShouldPreConsumedQuota int } func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, maxTokens int) (PriceData, error) { - modelPrice, usePrice := setting.GetModelPrice(info.OriginModelName, false) + modelPrice, usePrice := operation_setting.GetModelPrice(info.OriginModelName, false) groupRatio := setting.GetGroupRatio(info.Group) var preConsumedQuota int var modelRatio float64 var completionRatio float64 + var cacheRatio float64 if !usePrice { preConsumedTokens := common.PreConsumedQuota if maxTokens != 0 { preConsumedTokens = promptTokens + maxTokens } var success bool - modelRatio, success = setting.GetModelRatio(info.OriginModelName) + modelRatio, success = operation_setting.GetModelRatio(info.OriginModelName) if !success { if info.UserId == 1 { return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置,请设置或开始自用模式;Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName) @@ -37,7 +40,8 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置, 请联系管理员设置;Model %s ratio or price not set, please contact administrator to set", info.OriginModelName, info.OriginModelName) } } - completionRatio = setting.GetCompletionRatio(info.OriginModelName) + completionRatio = operation_setting.GetCompletionRatio(info.OriginModelName) + cacheRatio, _ = operation_setting.GetCacheRatio(info.OriginModelName) ratio := modelRatio * groupRatio preConsumedQuota = int(float64(preConsumedTokens) * ratio) } else { @@ -49,6 +53,7 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens CompletionRatio: completionRatio, GroupRatio: groupRatio, UsePrice: usePrice, + CacheRatio: cacheRatio, ShouldPreConsumedQuota: preConsumedQuota, }, nil } diff --git a/relay/relay-mj.go b/relay/relay-mj.go index 8baf033a..a7018456 100644 --- a/relay/relay-mj.go +++ b/relay/relay-mj.go @@ -15,6 +15,7 @@ import ( relayconstant "one-api/relay/constant" "one-api/service" "one-api/setting" + "one-api/setting/operation_setting" "strconv" "strings" "time" @@ -157,10 +158,10 @@ func RelaySwapFace(c *gin.Context) *dto.MidjourneyResponse { return service.MidjourneyErrorWrapper(constant.MjRequestError, "sour_base64_and_target_base64_is_required") } modelName := service.CoverActionToModelName(constant.MjActionSwapFace) - modelPrice, success := setting.GetModelPrice(modelName, true) + modelPrice, success := operation_setting.GetModelPrice(modelName, true) // 如果没有配置价格,则使用默认价格 if !success { - defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName] + defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName] if !ok { modelPrice = 0.1 } else { @@ -463,10 +464,10 @@ func RelayMidjourneySubmit(c *gin.Context, relayMode int) *dto.MidjourneyRespons fullRequestURL := fmt.Sprintf("%s%s", baseURL, requestURL) modelName := service.CoverActionToModelName(midjRequest.Action) - modelPrice, success := setting.GetModelPrice(modelName, true) + modelPrice, success := operation_setting.GetModelPrice(modelName, true) // 如果没有配置价格,则使用默认价格 if !success { - defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName] + defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName] if !ok { modelPrice = 0.1 } else { diff --git a/relay/relay-text.go b/relay/relay-text.go index 57b13ca7..ddf6767d 100644 --- a/relay/relay-text.go +++ b/relay/relay-text.go @@ -110,7 +110,7 @@ func TextHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) { if err != nil { return service.OpenAIErrorWrapperLocal(err, "model_price_error", http.StatusInternalServerError) } - + // pre-consume quota 预消耗配额 preConsumedQuota, userQuota, openaiErr := preConsumeQuota(c, priceData.ShouldPreConsumedQuota, relayInfo) if openaiErr != nil { @@ -304,24 +304,26 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, CompletionTokens: 0, TotalTokens: relayInfo.PromptTokens, } - extraContent += " ,(可能是请求出错)" + extraContent += "(可能是请求出错)" } useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix() promptTokens := usage.PromptTokens + cacheTokens := usage.PromptTokensDetails.CachedTokens completionTokens := usage.CompletionTokens modelName := relayInfo.OriginModelName tokenName := ctx.GetString("token_name") - completionRatio := setting.GetCompletionRatio(modelName) + completionRatio := priceData.CompletionRatio + cacheRatio := priceData.CacheRatio ratio := priceData.ModelRatio * priceData.GroupRatio modelRatio := priceData.ModelRatio groupRatio := priceData.GroupRatio modelPrice := priceData.ModelPrice - usePrice := priceData.UsePrice quota := 0 if !priceData.UsePrice { - quota = promptTokens + int(math.Round(float64(completionTokens)*completionRatio)) + quota = (promptTokens - cacheTokens) + int(math.Round(float64(cacheTokens)*cacheRatio)) + quota += int(math.Round(float64(completionTokens) * completionRatio)) quota = int(math.Round(float64(quota) * ratio)) if ratio != 0 && quota <= 0 { quota = 1 @@ -330,8 +332,9 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, quota = int(modelPrice * common.QuotaPerUnit * groupRatio) } totalTokens := promptTokens + completionTokens + var logContent string - if !usePrice { + if !priceData.UsePrice { logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio) } else { logContent = fmt.Sprintf("模型价格 %.2f,分组倍率 %.2f", modelPrice, groupRatio) @@ -372,7 +375,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, if extraContent != "" { logContent += ", " + extraContent } - other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice) + other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice) model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel, tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other) diff --git a/relay/relay_task.go b/relay/relay_task.go index ab35d3e8..26874ba6 100644 --- a/relay/relay_task.go +++ b/relay/relay_task.go @@ -16,6 +16,7 @@ import ( relayconstant "one-api/relay/constant" "one-api/service" "one-api/setting" + "one-api/setting/operation_setting" ) /* @@ -37,9 +38,9 @@ func RelayTaskSubmit(c *gin.Context, relayMode int) (taskErr *dto.TaskError) { } modelName := service.CoverTaskActionToModelName(platform, relayInfo.Action) - modelPrice, success := setting.GetModelPrice(modelName, true) + modelPrice, success := operation_setting.GetModelPrice(modelName, true) if !success { - defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName] + defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName] if !ok { modelPrice = 0.1 } else { diff --git a/relay/websocket.go b/relay/websocket.go index b0636057..c815eb71 100644 --- a/relay/websocket.go +++ b/relay/websocket.go @@ -11,6 +11,7 @@ import ( relaycommon "one-api/relay/common" "one-api/service" "one-api/setting" + "one-api/setting/operation_setting" ) func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWithStatusCode) { @@ -39,7 +40,7 @@ func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWi } } //relayInfo.UpstreamModelName = textRequest.Model - modelPrice, getModelPriceSuccess := setting.GetModelPrice(relayInfo.UpstreamModelName, false) + modelPrice, getModelPriceSuccess := operation_setting.GetModelPrice(relayInfo.UpstreamModelName, false) groupRatio := setting.GetGroupRatio(relayInfo.Group) var preConsumedQuota int @@ -65,7 +66,7 @@ func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWi //if realtimeEvent.Session.MaxResponseOutputTokens != 0 { // preConsumedTokens = promptTokens + int(realtimeEvent.Session.MaxResponseOutputTokens) //} - modelRatio, _ = setting.GetModelRatio(relayInfo.UpstreamModelName) + modelRatio, _ = operation_setting.GetModelRatio(relayInfo.UpstreamModelName) ratio = modelRatio * groupRatio preConsumedQuota = int(float64(preConsumedTokens) * ratio) } else { diff --git a/service/log_info_generate.go b/service/log_info_generate.go index 1e32d6f1..6406cbe1 100644 --- a/service/log_info_generate.go +++ b/service/log_info_generate.go @@ -1,16 +1,20 @@ package service import ( - "github.com/gin-gonic/gin" "one-api/dto" relaycommon "one-api/relay/common" + + "github.com/gin-gonic/gin" ) -func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio, modelPrice float64) map[string]interface{} { +func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio float64, + cacheTokens int, cacheRatio float64, modelPrice float64) map[string]interface{} { other := make(map[string]interface{}) other["model_ratio"] = modelRatio other["group_ratio"] = groupRatio other["completion_ratio"] = completionRatio + other["cache_tokens"] = cacheTokens + other["cache_ratio"] = cacheRatio other["model_price"] = modelPrice other["frt"] = float64(relayInfo.FirstResponseTime.UnixMilli() - relayInfo.StartTime.UnixMilli()) if relayInfo.ReasoningEffort != "" { @@ -27,7 +31,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m } func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} { - info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice) + info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice) info["ws"] = true info["audio_input"] = usage.InputTokenDetails.AudioTokens info["audio_output"] = usage.OutputTokenDetails.AudioTokens @@ -39,7 +43,7 @@ func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, us } func GenerateAudioOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} { - info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice) + info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice) info["audio"] = true info["audio_input"] = usage.PromptTokensDetails.AudioTokens info["audio_output"] = usage.CompletionTokenDetails.AudioTokens diff --git a/service/quota.go b/service/quota.go index b3412c1e..e4499ff9 100644 --- a/service/quota.go +++ b/service/quota.go @@ -12,6 +12,7 @@ import ( relaycommon "one-api/relay/common" "one-api/relay/helper" "one-api/setting" + "one-api/setting/operation_setting" "strings" "time" @@ -38,9 +39,9 @@ func calculateAudioQuota(info QuotaInfo) int { return int(info.ModelPrice * common.QuotaPerUnit * info.GroupRatio) } - completionRatio := setting.GetCompletionRatio(info.ModelName) - audioRatio := setting.GetAudioRatio(info.ModelName) - audioCompletionRatio := setting.GetAudioCompletionRatio(info.ModelName) + completionRatio := operation_setting.GetCompletionRatio(info.ModelName) + audioRatio := operation_setting.GetAudioRatio(info.ModelName) + audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName) ratio := info.GroupRatio * info.ModelRatio quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio)) @@ -75,7 +76,7 @@ func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usag audioInputTokens := usage.InputTokenDetails.AudioTokens audioOutTokens := usage.OutputTokenDetails.AudioTokens groupRatio := setting.GetGroupRatio(relayInfo.Group) - modelRatio, _ := setting.GetModelRatio(modelName) + modelRatio, _ := operation_setting.GetModelRatio(modelName) quotaInfo := QuotaInfo{ InputDetails: TokenDetails{ @@ -122,9 +123,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod audioOutTokens := usage.OutputTokenDetails.AudioTokens tokenName := ctx.GetString("token_name") - completionRatio := setting.GetCompletionRatio(modelName) - audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName) - audioCompletionRatio := setting.GetAudioCompletionRatio(modelName) + completionRatio := operation_setting.GetCompletionRatio(modelName) + audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName) + audioCompletionRatio := operation_setting.GetAudioCompletionRatio(modelName) quotaInfo := QuotaInfo{ InputDetails: TokenDetails{ @@ -184,9 +185,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, audioOutTokens := usage.CompletionTokenDetails.AudioTokens tokenName := ctx.GetString("token_name") - completionRatio := setting.GetCompletionRatio(relayInfo.OriginModelName) - audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName) - audioCompletionRatio := setting.GetAudioCompletionRatio(relayInfo.OriginModelName) + completionRatio := operation_setting.GetCompletionRatio(relayInfo.OriginModelName) + audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName) + audioCompletionRatio := operation_setting.GetAudioCompletionRatio(relayInfo.OriginModelName) modelRatio := priceData.ModelRatio groupRatio := priceData.GroupRatio diff --git a/service/token_counter.go b/service/token_counter.go index e868beb4..a6b8e86a 100644 --- a/service/token_counter.go +++ b/service/token_counter.go @@ -10,7 +10,7 @@ import ( "one-api/constant" "one-api/dto" relaycommon "one-api/relay/common" - "one-api/setting" + "one-api/setting/operation_setting" "strings" "unicode/utf8" @@ -33,7 +33,7 @@ func InitTokenEncoders() { if err != nil { common.FatalLog(fmt.Sprintf("failed to get gpt-4o token encoder: %s", err.Error())) } - for model, _ := range setting.GetDefaultModelRatioMap() { + for model, _ := range operation_setting.GetDefaultModelRatioMap() { if strings.HasPrefix(model, "gpt-3.5") { tokenEncoderMap[model] = cl100TokenEncoder } else if strings.HasPrefix(model, "gpt-4") { diff --git a/setting/operation_setting/cache_ratio.go b/setting/operation_setting/cache_ratio.go new file mode 100644 index 00000000..5943dcfa --- /dev/null +++ b/setting/operation_setting/cache_ratio.go @@ -0,0 +1,77 @@ +package operation_setting + +import ( + "encoding/json" + "one-api/common" + "sync" +) + +var defaultCacheRatio = map[string]float64{ + "gpt-4": 0.5, + "o1-2024-12-17": 0.5, + "o1-preview-2024-09-12": 0.5, + "o1-mini-2024-09-12": 0.5, + "gpt-4o-2024-11-20": 0.5, + "gpt-4o-2024-08-06": 0.5, + "gpt-4o-mini-2024-07-18": 0.5, + "gpt-4o-realtime-preview": 0.5, + "gpt-4o-mini-realtime-preview": 0.5, + "deepseek-chat": 0.5, + "deepseek-reasoner": 0.5, + "deepseek-coder": 0.5, +} + +var cacheRatioMap map[string]float64 +var cacheRatioMapMutex sync.RWMutex + +// GetCacheRatioMap returns the cache ratio map +func GetCacheRatioMap() map[string]float64 { + cacheRatioMapMutex.Lock() + defer cacheRatioMapMutex.Unlock() + if cacheRatioMap == nil { + cacheRatioMap = defaultCacheRatio + } + return cacheRatioMap +} + +// CacheRatio2JSONString converts the cache ratio map to a JSON string +func CacheRatio2JSONString() string { + GetCacheRatioMap() + jsonBytes, err := json.Marshal(cacheRatioMap) + if err != nil { + common.SysError("error marshalling cache ratio: " + err.Error()) + } + return string(jsonBytes) +} + +// UpdateCacheRatioByJSONString updates the cache ratio map from a JSON string +func UpdateCacheRatioByJSONString(jsonStr string) error { + cacheRatioMapMutex.Lock() + defer cacheRatioMapMutex.Unlock() + cacheRatioMap = make(map[string]float64) + return json.Unmarshal([]byte(jsonStr), &cacheRatioMap) +} + +// GetCacheRatio returns the cache ratio for a model +func GetCacheRatio(name string) (float64, bool) { + GetCacheRatioMap() + ratio, ok := cacheRatioMap[name] + if !ok { + return 0.5, false // Default to 0.5 if not found + } + return ratio, true +} + +// DefaultCacheRatio2JSONString converts the default cache ratio map to a JSON string +func DefaultCacheRatio2JSONString() string { + jsonBytes, err := json.Marshal(defaultCacheRatio) + if err != nil { + common.SysError("error marshalling default cache ratio: " + err.Error()) + } + return string(jsonBytes) +} + +// GetDefaultCacheRatioMap returns the default cache ratio map +func GetDefaultCacheRatioMap() map[string]float64 { + return defaultCacheRatio +} diff --git a/setting/model-ratio.go b/setting/operation_setting/model-ratio.go similarity index 99% rename from setting/model-ratio.go rename to setting/operation_setting/model-ratio.go index 54b214f9..d9312e6c 100644 --- a/setting/model-ratio.go +++ b/setting/operation_setting/model-ratio.go @@ -1,9 +1,8 @@ -package setting +package operation_setting import ( "encoding/json" "one-api/common" - "one-api/setting/operation_setting" "strings" "sync" ) @@ -326,7 +325,7 @@ func GetModelRatio(name string) (float64, bool) { } ratio, ok := modelRatioMap[name] if !ok { - return 37.5, operation_setting.SelfUseModeEnabled + return 37.5, SelfUseModeEnabled } return ratio, true } diff --git a/web/src/components/LogsTable.js b/web/src/components/LogsTable.js index 04a1be40..cf1dbaea 100644 --- a/web/src/components/LogsTable.js +++ b/web/src/components/LogsTable.js @@ -464,6 +464,8 @@ const LogsTable = () => { other.model_ratio, other.model_price, other.group_ratio, + other.cache_tokens || 0, + other.cache_ratio || 1.0, ); return ( { other?.audio_ratio, other?.audio_completion_ratio, other.group_ratio, + other.cache_tokens || 0, + other.cache_ratio || 1.0, ); } else { content = renderModelPrice( @@ -674,6 +678,8 @@ const LogsTable = () => { other.model_price, other.completion_ratio, other.group_ratio, + other.cache_tokens || 0, + other.cache_ratio || 1.0, ); } expandDataLocal.push({ diff --git a/web/src/components/OperationSetting.js b/web/src/components/OperationSetting.js index 5c51c751..85d883d5 100644 --- a/web/src/components/OperationSetting.js +++ b/web/src/components/OperationSetting.js @@ -28,6 +28,7 @@ const OperationSetting = () => { PreConsumedQuota: 0, StreamCacheQueueLength: 0, ModelRatio: '', + CacheRatio: '', CompletionRatio: '', ModelPrice: '', GroupRatio: '', @@ -77,7 +78,8 @@ const OperationSetting = () => { item.key === 'GroupRatio' || item.key === 'UserUsableGroups' || item.key === 'CompletionRatio' || - item.key === 'ModelPrice' + item.key === 'ModelPrice' || + item.key === 'CacheRatio' ) { item.value = JSON.stringify(JSON.parse(item.value), null, 2); } diff --git a/web/src/helpers/render.js b/web/src/helpers/render.js index 5342d741..c71df108 100644 --- a/web/src/helpers/render.js +++ b/web/src/helpers/render.js @@ -298,6 +298,8 @@ export function renderModelPrice( modelPrice = -1, completionRatio, groupRatio, + cacheTokens = 0, + cacheRatio = 1.0, ) { if (modelPrice !== -1) { return i18next.t('模型价格:${{price}} * 分组倍率:{{ratio}} = ${{total}}', { @@ -311,9 +313,15 @@ export function renderModelPrice( } let inputRatioPrice = modelRatio * 2.0; let completionRatioPrice = modelRatio * 2.0 * completionRatio; + let cacheRatioPrice = modelRatio * 2.0 * cacheRatio; + + // Calculate effective input tokens (non-cached + cached with ratio applied) + const effectiveInputTokens = (inputTokens - cacheTokens) + (cacheTokens * cacheRatio); + let price = - (inputTokens / 1000000) * inputRatioPrice * groupRatio + + (effectiveInputTokens / 1000000) * inputRatioPrice * groupRatio + (completionTokens / 1000000) * completionRatioPrice * groupRatio; + return ( <>
@@ -327,16 +335,36 @@ export function renderModelPrice( ratio: groupRatio, total: completionRatioPrice * groupRatio })}

+ {cacheTokens > 0 && ( +

{i18next.t('缓存:${{price}} * {{ratio}} = ${{total}} / 1M tokens (缓存比例: {{cacheRatio}})', { + price: cacheRatioPrice, + ratio: groupRatio, + total: cacheRatioPrice * groupRatio, + cacheRatio: cacheRatio + })}

+ )}

- {i18next.t('提示 {{input}} tokens / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', { - input: inputTokens, - price: inputRatioPrice, - completion: completionTokens, - compPrice: completionRatioPrice, - ratio: groupRatio, - total: price.toFixed(6) - })} + {cacheTokens > 0 ? + i18next.t('提示 {{nonCacheInput}} tokens + 缓存 {{cacheInput}} tokens * {{cacheRatio}} / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', { + nonCacheInput: inputTokens - cacheTokens, + cacheInput: cacheTokens, + cacheRatio: cacheRatio, + price: inputRatioPrice, + completion: completionTokens, + compPrice: completionRatioPrice, + ratio: groupRatio, + total: price.toFixed(6) + }) : + i18next.t('提示 {{input}} tokens / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', { + input: inputTokens, + price: inputRatioPrice, + completion: completionTokens, + compPrice: completionRatioPrice, + ratio: groupRatio, + total: price.toFixed(6) + }) + }

{i18next.t('仅供参考,以实际扣费为准')}

@@ -349,6 +377,8 @@ export function renderModelPriceSimple( modelRatio, modelPrice = -1, groupRatio, + cacheTokens = 0, + cacheRatio = 1.0, ) { if (modelPrice !== -1) { return i18next.t('价格:${{price}} * 分组:{{ratio}}', { @@ -356,10 +386,18 @@ export function renderModelPriceSimple( ratio: groupRatio }); } else { - return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}}', { - ratio: modelRatio, - groupRatio: groupRatio - }); + if (cacheTokens !== 0) { + return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}} * 缓存比例: {{cacheRatio}}', { + ratio: modelRatio, + groupRatio: groupRatio, + cacheRatio: cacheRatio + }); + } else { + return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}}', { + ratio: modelRatio, + groupRatio: groupRatio + }); + } } } @@ -374,6 +412,8 @@ export function renderAudioModelPrice( audioRatio, audioCompletionRatio, groupRatio, + cacheTokens = 0, + cacheRatio = 1.0, ) { // 1 ratio = $0.002 / 1K tokens if (modelPrice !== -1) { @@ -388,8 +428,13 @@ export function renderAudioModelPrice( // 这里的 *2 是因为 1倍率=0.002刀,请勿删除 let inputRatioPrice = modelRatio * 2.0; let completionRatioPrice = modelRatio * 2.0 * completionRatio; + let cacheRatioPrice = modelRatio * 2.0 * cacheRatio; + + // Calculate effective input tokens (non-cached + cached with ratio applied) + const effectiveInputTokens = (inputTokens - cacheTokens) + (cacheTokens * cacheRatio); + let price = - (inputTokens / 1000000) * inputRatioPrice * groupRatio + + (effectiveInputTokens / 1000000) * inputRatioPrice * groupRatio + (completionTokens / 1000000) * completionRatioPrice * groupRatio + (audioInputTokens / 1000000) * inputRatioPrice * audioRatio * groupRatio + (audioCompletionTokens / 1000000) * inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio; @@ -406,6 +451,14 @@ export function renderAudioModelPrice( ratio: groupRatio, total: completionRatioPrice * groupRatio })}

+ {cacheTokens > 0 && ( +

{i18next.t('缓存:${{price}} * {{ratio}} = ${{total}} / 1M tokens (缓存比例: {{cacheRatio}})', { + price: cacheRatioPrice, + ratio: groupRatio, + total: cacheRatioPrice * groupRatio, + cacheRatio: cacheRatio + })}

+ )}

{i18next.t('音频提示:${{price}} * {{ratio}} * {{audioRatio}} = ${{total}} / 1M tokens', { price: inputRatioPrice, ratio: groupRatio, @@ -420,12 +473,22 @@ export function renderAudioModelPrice( total: inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio })}

- {i18next.t('文字提示 {{input}} tokens / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', { - input: inputTokens, - price: inputRatioPrice, - completion: completionTokens, - compPrice: completionRatioPrice - })} + {cacheTokens > 0 ? + i18next.t('文字提示 {{nonCacheInput}} tokens + 文字缓存 {{cacheInput}} tokens * {{cacheRatio}} / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', { + nonCacheInput: inputTokens - cacheTokens, + cacheInput: cacheTokens, + cacheRatio: cacheRatio, + price: inputRatioPrice, + completion: completionTokens, + compPrice: completionRatioPrice + }) : + i18next.t('文字提示 {{input}} tokens / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', { + input: inputTokens, + price: inputRatioPrice, + completion: completionTokens, + compPrice: completionRatioPrice + }) + }

{i18next.t('音频提示 {{input}} tokens / 1M tokens * ${{price}} * {{audioRatio}} + 音频补全 {{completion}} tokens / 1M tokens * ${{price}} * {{audioRatio}} * {{audioCompRatio}}', { diff --git a/web/src/pages/Setting/Operation/ModelRatioSettings.js b/web/src/pages/Setting/Operation/ModelRatioSettings.js index 48981d6a..1bc37550 100644 --- a/web/src/pages/Setting/Operation/ModelRatioSettings.js +++ b/web/src/pages/Setting/Operation/ModelRatioSettings.js @@ -15,6 +15,7 @@ export default function ModelRatioSettings(props) { const [inputs, setInputs] = useState({ ModelPrice: '', ModelRatio: '', + CacheRatio: '', CompletionRatio: '', }); const refForm = useRef(); @@ -139,6 +140,25 @@ export default function ModelRatioSettings(props) { /> + + + verifyJSON(value), + message: '不是合法的 JSON 字符串' + } + ]} + onChange={(value) => setInputs({ ...inputs, CacheRatio: value })} + /> + +