From 4f194f4e6a1559d8eb5ecf5d0886c079a91e39cf Mon Sep 17 00:00:00 2001
From: "1808837298@qq.com" <1808837298@qq.com>
Date: Sat, 8 Mar 2025 01:30:50 +0800
Subject: [PATCH] feat: Implement cache token ratio for more precise token
pricing
---
controller/channel-test.go | 2 +-
controller/pricing.go | 5 +-
model/option.go | 15 ++-
model/pricing.go | 8 +-
relay/helper/price.go | 11 +-
relay/relay-mj.go | 9 +-
relay/relay-text.go | 17 +--
relay/relay_task.go | 5 +-
relay/websocket.go | 5 +-
service/log_info_generate.go | 12 +-
service/quota.go | 21 ++--
service/token_counter.go | 4 +-
setting/operation_setting/cache_ratio.go | 77 +++++++++++++
.../{ => operation_setting}/model-ratio.go | 5 +-
web/src/components/LogsTable.js | 6 +
web/src/components/OperationSetting.js | 4 +-
web/src/helpers/render.js | 103 ++++++++++++++----
.../Setting/Operation/ModelRatioSettings.js | 20 ++++
18 files changed, 258 insertions(+), 71 deletions(-)
create mode 100644 setting/operation_setting/cache_ratio.go
rename setting/{ => operation_setting}/model-ratio.go (99%)
diff --git a/controller/channel-test.go b/controller/channel-test.go
index 98623a76..02a30593 100644
--- a/controller/channel-test.go
+++ b/controller/channel-test.go
@@ -158,7 +158,7 @@ func testChannel(channel *model.Channel, testModel string) (err error, openAIErr
tok := time.Now()
milliseconds := tok.Sub(tik).Milliseconds()
consumedTime := float64(milliseconds) / 1000.0
- other := service.GenerateTextOtherInfo(c, info, priceData.ModelRatio, priceData.GroupRatio, priceData.CompletionRatio, priceData.ModelPrice)
+ other := service.GenerateTextOtherInfo(c, info, priceData.ModelRatio, priceData.GroupRatio, priceData.CompletionRatio, 0, 0.0, priceData.ModelPrice)
model.RecordConsumeLog(c, 1, channel.Id, usage.PromptTokens, usage.CompletionTokens, info.OriginModelName, "模型测试",
quota, "模型测试", 0, quota, int(consumedTime), false, info.Group, other)
common.SysLog(fmt.Sprintf("testing channel #%d, response: \n%s", channel.Id, string(respBody)))
diff --git a/controller/pricing.go b/controller/pricing.go
index 97f27490..1cbfe731 100644
--- a/controller/pricing.go
+++ b/controller/pricing.go
@@ -4,6 +4,7 @@ import (
"github.com/gin-gonic/gin"
"one-api/model"
"one-api/setting"
+ "one-api/setting/operation_setting"
)
func GetPricing(c *gin.Context) {
@@ -39,7 +40,7 @@ func GetPricing(c *gin.Context) {
}
func ResetModelRatio(c *gin.Context) {
- defaultStr := setting.DefaultModelRatio2JSONString()
+ defaultStr := operation_setting.DefaultModelRatio2JSONString()
err := model.UpdateOption("ModelRatio", defaultStr)
if err != nil {
c.JSON(200, gin.H{
@@ -48,7 +49,7 @@ func ResetModelRatio(c *gin.Context) {
})
return
}
- err = setting.UpdateModelRatioByJSONString(defaultStr)
+ err = operation_setting.UpdateModelRatioByJSONString(defaultStr)
if err != nil {
c.JSON(200, gin.H{
"success": false,
diff --git a/model/option.go b/model/option.go
index a184c069..fe12eab1 100644
--- a/model/option.go
+++ b/model/option.go
@@ -92,11 +92,12 @@ func InitOptionMap() {
common.OptionMap["ModelRequestRateLimitCount"] = strconv.Itoa(setting.ModelRequestRateLimitCount)
common.OptionMap["ModelRequestRateLimitDurationMinutes"] = strconv.Itoa(setting.ModelRequestRateLimitDurationMinutes)
common.OptionMap["ModelRequestRateLimitSuccessCount"] = strconv.Itoa(setting.ModelRequestRateLimitSuccessCount)
- common.OptionMap["ModelRatio"] = setting.ModelRatio2JSONString()
- common.OptionMap["ModelPrice"] = setting.ModelPrice2JSONString()
+ common.OptionMap["ModelRatio"] = operation_setting.ModelRatio2JSONString()
+ common.OptionMap["ModelPrice"] = operation_setting.ModelPrice2JSONString()
+ common.OptionMap["CacheRatio"] = operation_setting.CacheRatio2JSONString()
common.OptionMap["GroupRatio"] = setting.GroupRatio2JSONString()
common.OptionMap["UserUsableGroups"] = setting.UserUsableGroups2JSONString()
- common.OptionMap["CompletionRatio"] = setting.CompletionRatio2JSONString()
+ common.OptionMap["CompletionRatio"] = operation_setting.CompletionRatio2JSONString()
common.OptionMap["TopUpLink"] = common.TopUpLink
common.OptionMap["ChatLink"] = common.ChatLink
common.OptionMap["ChatLink2"] = common.ChatLink2
@@ -344,15 +345,17 @@ func updateOptionMap(key string, value string) (err error) {
case "DataExportDefaultTime":
common.DataExportDefaultTime = value
case "ModelRatio":
- err = setting.UpdateModelRatioByJSONString(value)
+ err = operation_setting.UpdateModelRatioByJSONString(value)
case "GroupRatio":
err = setting.UpdateGroupRatioByJSONString(value)
case "UserUsableGroups":
err = setting.UpdateUserUsableGroupsByJSONString(value)
case "CompletionRatio":
- err = setting.UpdateCompletionRatioByJSONString(value)
+ err = operation_setting.UpdateCompletionRatioByJSONString(value)
case "ModelPrice":
- err = setting.UpdateModelPriceByJSONString(value)
+ err = operation_setting.UpdateModelPriceByJSONString(value)
+ case "CacheRatio":
+ err = operation_setting.UpdateCacheRatioByJSONString(value)
case "TopUpLink":
common.TopUpLink = value
case "ChatLink":
diff --git a/model/pricing.go b/model/pricing.go
index 2d0aa1b7..ba1815e2 100644
--- a/model/pricing.go
+++ b/model/pricing.go
@@ -2,7 +2,7 @@ package model
import (
"one-api/common"
- "one-api/setting"
+ "one-api/setting/operation_setting"
"sync"
"time"
)
@@ -65,14 +65,14 @@ func updatePricing() {
ModelName: model,
EnableGroup: groups,
}
- modelPrice, findPrice := setting.GetModelPrice(model, false)
+ modelPrice, findPrice := operation_setting.GetModelPrice(model, false)
if findPrice {
pricing.ModelPrice = modelPrice
pricing.QuotaType = 1
} else {
- modelRatio, _ := setting.GetModelRatio(model)
+ modelRatio, _ := operation_setting.GetModelRatio(model)
pricing.ModelRatio = modelRatio
- pricing.CompletionRatio = setting.GetCompletionRatio(model)
+ pricing.CompletionRatio = operation_setting.GetCompletionRatio(model)
pricing.QuotaType = 0
}
pricingMap = append(pricingMap, pricing)
diff --git a/relay/helper/price.go b/relay/helper/price.go
index 51f64082..b169df98 100644
--- a/relay/helper/price.go
+++ b/relay/helper/price.go
@@ -6,30 +6,33 @@ import (
"one-api/common"
relaycommon "one-api/relay/common"
"one-api/setting"
+ "one-api/setting/operation_setting"
)
type PriceData struct {
ModelPrice float64
ModelRatio float64
CompletionRatio float64
+ CacheRatio float64
GroupRatio float64
UsePrice bool
ShouldPreConsumedQuota int
}
func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, maxTokens int) (PriceData, error) {
- modelPrice, usePrice := setting.GetModelPrice(info.OriginModelName, false)
+ modelPrice, usePrice := operation_setting.GetModelPrice(info.OriginModelName, false)
groupRatio := setting.GetGroupRatio(info.Group)
var preConsumedQuota int
var modelRatio float64
var completionRatio float64
+ var cacheRatio float64
if !usePrice {
preConsumedTokens := common.PreConsumedQuota
if maxTokens != 0 {
preConsumedTokens = promptTokens + maxTokens
}
var success bool
- modelRatio, success = setting.GetModelRatio(info.OriginModelName)
+ modelRatio, success = operation_setting.GetModelRatio(info.OriginModelName)
if !success {
if info.UserId == 1 {
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置,请设置或开始自用模式;Model %s ratio or price not set, please set or start self-use mode", info.OriginModelName, info.OriginModelName)
@@ -37,7 +40,8 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
return PriceData{}, fmt.Errorf("模型 %s 倍率或价格未配置, 请联系管理员设置;Model %s ratio or price not set, please contact administrator to set", info.OriginModelName, info.OriginModelName)
}
}
- completionRatio = setting.GetCompletionRatio(info.OriginModelName)
+ completionRatio = operation_setting.GetCompletionRatio(info.OriginModelName)
+ cacheRatio, _ = operation_setting.GetCacheRatio(info.OriginModelName)
ratio := modelRatio * groupRatio
preConsumedQuota = int(float64(preConsumedTokens) * ratio)
} else {
@@ -49,6 +53,7 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
CompletionRatio: completionRatio,
GroupRatio: groupRatio,
UsePrice: usePrice,
+ CacheRatio: cacheRatio,
ShouldPreConsumedQuota: preConsumedQuota,
}, nil
}
diff --git a/relay/relay-mj.go b/relay/relay-mj.go
index 8baf033a..a7018456 100644
--- a/relay/relay-mj.go
+++ b/relay/relay-mj.go
@@ -15,6 +15,7 @@ import (
relayconstant "one-api/relay/constant"
"one-api/service"
"one-api/setting"
+ "one-api/setting/operation_setting"
"strconv"
"strings"
"time"
@@ -157,10 +158,10 @@ func RelaySwapFace(c *gin.Context) *dto.MidjourneyResponse {
return service.MidjourneyErrorWrapper(constant.MjRequestError, "sour_base64_and_target_base64_is_required")
}
modelName := service.CoverActionToModelName(constant.MjActionSwapFace)
- modelPrice, success := setting.GetModelPrice(modelName, true)
+ modelPrice, success := operation_setting.GetModelPrice(modelName, true)
// 如果没有配置价格,则使用默认价格
if !success {
- defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName]
+ defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName]
if !ok {
modelPrice = 0.1
} else {
@@ -463,10 +464,10 @@ func RelayMidjourneySubmit(c *gin.Context, relayMode int) *dto.MidjourneyRespons
fullRequestURL := fmt.Sprintf("%s%s", baseURL, requestURL)
modelName := service.CoverActionToModelName(midjRequest.Action)
- modelPrice, success := setting.GetModelPrice(modelName, true)
+ modelPrice, success := operation_setting.GetModelPrice(modelName, true)
// 如果没有配置价格,则使用默认价格
if !success {
- defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName]
+ defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName]
if !ok {
modelPrice = 0.1
} else {
diff --git a/relay/relay-text.go b/relay/relay-text.go
index 57b13ca7..ddf6767d 100644
--- a/relay/relay-text.go
+++ b/relay/relay-text.go
@@ -110,7 +110,7 @@ func TextHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) {
if err != nil {
return service.OpenAIErrorWrapperLocal(err, "model_price_error", http.StatusInternalServerError)
}
-
+
// pre-consume quota 预消耗配额
preConsumedQuota, userQuota, openaiErr := preConsumeQuota(c, priceData.ShouldPreConsumedQuota, relayInfo)
if openaiErr != nil {
@@ -304,24 +304,26 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
CompletionTokens: 0,
TotalTokens: relayInfo.PromptTokens,
}
- extraContent += " ,(可能是请求出错)"
+ extraContent += "(可能是请求出错)"
}
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
promptTokens := usage.PromptTokens
+ cacheTokens := usage.PromptTokensDetails.CachedTokens
completionTokens := usage.CompletionTokens
modelName := relayInfo.OriginModelName
tokenName := ctx.GetString("token_name")
- completionRatio := setting.GetCompletionRatio(modelName)
+ completionRatio := priceData.CompletionRatio
+ cacheRatio := priceData.CacheRatio
ratio := priceData.ModelRatio * priceData.GroupRatio
modelRatio := priceData.ModelRatio
groupRatio := priceData.GroupRatio
modelPrice := priceData.ModelPrice
- usePrice := priceData.UsePrice
quota := 0
if !priceData.UsePrice {
- quota = promptTokens + int(math.Round(float64(completionTokens)*completionRatio))
+ quota = (promptTokens - cacheTokens) + int(math.Round(float64(cacheTokens)*cacheRatio))
+ quota += int(math.Round(float64(completionTokens) * completionRatio))
quota = int(math.Round(float64(quota) * ratio))
if ratio != 0 && quota <= 0 {
quota = 1
@@ -330,8 +332,9 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
quota = int(modelPrice * common.QuotaPerUnit * groupRatio)
}
totalTokens := promptTokens + completionTokens
+
var logContent string
- if !usePrice {
+ if !priceData.UsePrice {
logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,分组倍率 %.2f", modelRatio, completionRatio, groupRatio)
} else {
logContent = fmt.Sprintf("模型价格 %.2f,分组倍率 %.2f", modelPrice, groupRatio)
@@ -372,7 +375,7 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
if extraContent != "" {
logContent += ", " + extraContent
}
- other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice)
+ other := service.GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, cacheTokens, cacheRatio, modelPrice)
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, logModel,
tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
diff --git a/relay/relay_task.go b/relay/relay_task.go
index ab35d3e8..26874ba6 100644
--- a/relay/relay_task.go
+++ b/relay/relay_task.go
@@ -16,6 +16,7 @@ import (
relayconstant "one-api/relay/constant"
"one-api/service"
"one-api/setting"
+ "one-api/setting/operation_setting"
)
/*
@@ -37,9 +38,9 @@ func RelayTaskSubmit(c *gin.Context, relayMode int) (taskErr *dto.TaskError) {
}
modelName := service.CoverTaskActionToModelName(platform, relayInfo.Action)
- modelPrice, success := setting.GetModelPrice(modelName, true)
+ modelPrice, success := operation_setting.GetModelPrice(modelName, true)
if !success {
- defaultPrice, ok := setting.GetDefaultModelRatioMap()[modelName]
+ defaultPrice, ok := operation_setting.GetDefaultModelRatioMap()[modelName]
if !ok {
modelPrice = 0.1
} else {
diff --git a/relay/websocket.go b/relay/websocket.go
index b0636057..c815eb71 100644
--- a/relay/websocket.go
+++ b/relay/websocket.go
@@ -11,6 +11,7 @@ import (
relaycommon "one-api/relay/common"
"one-api/service"
"one-api/setting"
+ "one-api/setting/operation_setting"
)
func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWithStatusCode) {
@@ -39,7 +40,7 @@ func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWi
}
}
//relayInfo.UpstreamModelName = textRequest.Model
- modelPrice, getModelPriceSuccess := setting.GetModelPrice(relayInfo.UpstreamModelName, false)
+ modelPrice, getModelPriceSuccess := operation_setting.GetModelPrice(relayInfo.UpstreamModelName, false)
groupRatio := setting.GetGroupRatio(relayInfo.Group)
var preConsumedQuota int
@@ -65,7 +66,7 @@ func WssHelper(c *gin.Context, ws *websocket.Conn) (openaiErr *dto.OpenAIErrorWi
//if realtimeEvent.Session.MaxResponseOutputTokens != 0 {
// preConsumedTokens = promptTokens + int(realtimeEvent.Session.MaxResponseOutputTokens)
//}
- modelRatio, _ = setting.GetModelRatio(relayInfo.UpstreamModelName)
+ modelRatio, _ = operation_setting.GetModelRatio(relayInfo.UpstreamModelName)
ratio = modelRatio * groupRatio
preConsumedQuota = int(float64(preConsumedTokens) * ratio)
} else {
diff --git a/service/log_info_generate.go b/service/log_info_generate.go
index 1e32d6f1..6406cbe1 100644
--- a/service/log_info_generate.go
+++ b/service/log_info_generate.go
@@ -1,16 +1,20 @@
package service
import (
- "github.com/gin-gonic/gin"
"one-api/dto"
relaycommon "one-api/relay/common"
+
+ "github.com/gin-gonic/gin"
)
-func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio, modelPrice float64) map[string]interface{} {
+func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelRatio, groupRatio, completionRatio float64,
+ cacheTokens int, cacheRatio float64, modelPrice float64) map[string]interface{} {
other := make(map[string]interface{})
other["model_ratio"] = modelRatio
other["group_ratio"] = groupRatio
other["completion_ratio"] = completionRatio
+ other["cache_tokens"] = cacheTokens
+ other["cache_ratio"] = cacheRatio
other["model_price"] = modelPrice
other["frt"] = float64(relayInfo.FirstResponseTime.UnixMilli() - relayInfo.StartTime.UnixMilli())
if relayInfo.ReasoningEffort != "" {
@@ -27,7 +31,7 @@ func GenerateTextOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, m
}
func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
- info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice)
+ info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice)
info["ws"] = true
info["audio_input"] = usage.InputTokenDetails.AudioTokens
info["audio_output"] = usage.OutputTokenDetails.AudioTokens
@@ -39,7 +43,7 @@ func GenerateWssOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, us
}
func GenerateAudioOtherInfo(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.Usage, modelRatio, groupRatio, completionRatio, audioRatio, audioCompletionRatio, modelPrice float64) map[string]interface{} {
- info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, modelPrice)
+ info := GenerateTextOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio, 0, 0.0, modelPrice)
info["audio"] = true
info["audio_input"] = usage.PromptTokensDetails.AudioTokens
info["audio_output"] = usage.CompletionTokenDetails.AudioTokens
diff --git a/service/quota.go b/service/quota.go
index b3412c1e..e4499ff9 100644
--- a/service/quota.go
+++ b/service/quota.go
@@ -12,6 +12,7 @@ import (
relaycommon "one-api/relay/common"
"one-api/relay/helper"
"one-api/setting"
+ "one-api/setting/operation_setting"
"strings"
"time"
@@ -38,9 +39,9 @@ func calculateAudioQuota(info QuotaInfo) int {
return int(info.ModelPrice * common.QuotaPerUnit * info.GroupRatio)
}
- completionRatio := setting.GetCompletionRatio(info.ModelName)
- audioRatio := setting.GetAudioRatio(info.ModelName)
- audioCompletionRatio := setting.GetAudioCompletionRatio(info.ModelName)
+ completionRatio := operation_setting.GetCompletionRatio(info.ModelName)
+ audioRatio := operation_setting.GetAudioRatio(info.ModelName)
+ audioCompletionRatio := operation_setting.GetAudioCompletionRatio(info.ModelName)
ratio := info.GroupRatio * info.ModelRatio
quota := info.InputDetails.TextTokens + int(math.Round(float64(info.OutputDetails.TextTokens)*completionRatio))
@@ -75,7 +76,7 @@ func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usag
audioInputTokens := usage.InputTokenDetails.AudioTokens
audioOutTokens := usage.OutputTokenDetails.AudioTokens
groupRatio := setting.GetGroupRatio(relayInfo.Group)
- modelRatio, _ := setting.GetModelRatio(modelName)
+ modelRatio, _ := operation_setting.GetModelRatio(modelName)
quotaInfo := QuotaInfo{
InputDetails: TokenDetails{
@@ -122,9 +123,9 @@ func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, mod
audioOutTokens := usage.OutputTokenDetails.AudioTokens
tokenName := ctx.GetString("token_name")
- completionRatio := setting.GetCompletionRatio(modelName)
- audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName)
- audioCompletionRatio := setting.GetAudioCompletionRatio(modelName)
+ completionRatio := operation_setting.GetCompletionRatio(modelName)
+ audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName)
+ audioCompletionRatio := operation_setting.GetAudioCompletionRatio(modelName)
quotaInfo := QuotaInfo{
InputDetails: TokenDetails{
@@ -184,9 +185,9 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
audioOutTokens := usage.CompletionTokenDetails.AudioTokens
tokenName := ctx.GetString("token_name")
- completionRatio := setting.GetCompletionRatio(relayInfo.OriginModelName)
- audioRatio := setting.GetAudioRatio(relayInfo.OriginModelName)
- audioCompletionRatio := setting.GetAudioCompletionRatio(relayInfo.OriginModelName)
+ completionRatio := operation_setting.GetCompletionRatio(relayInfo.OriginModelName)
+ audioRatio := operation_setting.GetAudioRatio(relayInfo.OriginModelName)
+ audioCompletionRatio := operation_setting.GetAudioCompletionRatio(relayInfo.OriginModelName)
modelRatio := priceData.ModelRatio
groupRatio := priceData.GroupRatio
diff --git a/service/token_counter.go b/service/token_counter.go
index e868beb4..a6b8e86a 100644
--- a/service/token_counter.go
+++ b/service/token_counter.go
@@ -10,7 +10,7 @@ import (
"one-api/constant"
"one-api/dto"
relaycommon "one-api/relay/common"
- "one-api/setting"
+ "one-api/setting/operation_setting"
"strings"
"unicode/utf8"
@@ -33,7 +33,7 @@ func InitTokenEncoders() {
if err != nil {
common.FatalLog(fmt.Sprintf("failed to get gpt-4o token encoder: %s", err.Error()))
}
- for model, _ := range setting.GetDefaultModelRatioMap() {
+ for model, _ := range operation_setting.GetDefaultModelRatioMap() {
if strings.HasPrefix(model, "gpt-3.5") {
tokenEncoderMap[model] = cl100TokenEncoder
} else if strings.HasPrefix(model, "gpt-4") {
diff --git a/setting/operation_setting/cache_ratio.go b/setting/operation_setting/cache_ratio.go
new file mode 100644
index 00000000..5943dcfa
--- /dev/null
+++ b/setting/operation_setting/cache_ratio.go
@@ -0,0 +1,77 @@
+package operation_setting
+
+import (
+ "encoding/json"
+ "one-api/common"
+ "sync"
+)
+
+var defaultCacheRatio = map[string]float64{
+ "gpt-4": 0.5,
+ "o1-2024-12-17": 0.5,
+ "o1-preview-2024-09-12": 0.5,
+ "o1-mini-2024-09-12": 0.5,
+ "gpt-4o-2024-11-20": 0.5,
+ "gpt-4o-2024-08-06": 0.5,
+ "gpt-4o-mini-2024-07-18": 0.5,
+ "gpt-4o-realtime-preview": 0.5,
+ "gpt-4o-mini-realtime-preview": 0.5,
+ "deepseek-chat": 0.5,
+ "deepseek-reasoner": 0.5,
+ "deepseek-coder": 0.5,
+}
+
+var cacheRatioMap map[string]float64
+var cacheRatioMapMutex sync.RWMutex
+
+// GetCacheRatioMap returns the cache ratio map
+func GetCacheRatioMap() map[string]float64 {
+ cacheRatioMapMutex.Lock()
+ defer cacheRatioMapMutex.Unlock()
+ if cacheRatioMap == nil {
+ cacheRatioMap = defaultCacheRatio
+ }
+ return cacheRatioMap
+}
+
+// CacheRatio2JSONString converts the cache ratio map to a JSON string
+func CacheRatio2JSONString() string {
+ GetCacheRatioMap()
+ jsonBytes, err := json.Marshal(cacheRatioMap)
+ if err != nil {
+ common.SysError("error marshalling cache ratio: " + err.Error())
+ }
+ return string(jsonBytes)
+}
+
+// UpdateCacheRatioByJSONString updates the cache ratio map from a JSON string
+func UpdateCacheRatioByJSONString(jsonStr string) error {
+ cacheRatioMapMutex.Lock()
+ defer cacheRatioMapMutex.Unlock()
+ cacheRatioMap = make(map[string]float64)
+ return json.Unmarshal([]byte(jsonStr), &cacheRatioMap)
+}
+
+// GetCacheRatio returns the cache ratio for a model
+func GetCacheRatio(name string) (float64, bool) {
+ GetCacheRatioMap()
+ ratio, ok := cacheRatioMap[name]
+ if !ok {
+ return 0.5, false // Default to 0.5 if not found
+ }
+ return ratio, true
+}
+
+// DefaultCacheRatio2JSONString converts the default cache ratio map to a JSON string
+func DefaultCacheRatio2JSONString() string {
+ jsonBytes, err := json.Marshal(defaultCacheRatio)
+ if err != nil {
+ common.SysError("error marshalling default cache ratio: " + err.Error())
+ }
+ return string(jsonBytes)
+}
+
+// GetDefaultCacheRatioMap returns the default cache ratio map
+func GetDefaultCacheRatioMap() map[string]float64 {
+ return defaultCacheRatio
+}
diff --git a/setting/model-ratio.go b/setting/operation_setting/model-ratio.go
similarity index 99%
rename from setting/model-ratio.go
rename to setting/operation_setting/model-ratio.go
index 54b214f9..d9312e6c 100644
--- a/setting/model-ratio.go
+++ b/setting/operation_setting/model-ratio.go
@@ -1,9 +1,8 @@
-package setting
+package operation_setting
import (
"encoding/json"
"one-api/common"
- "one-api/setting/operation_setting"
"strings"
"sync"
)
@@ -326,7 +325,7 @@ func GetModelRatio(name string) (float64, bool) {
}
ratio, ok := modelRatioMap[name]
if !ok {
- return 37.5, operation_setting.SelfUseModeEnabled
+ return 37.5, SelfUseModeEnabled
}
return ratio, true
}
diff --git a/web/src/components/LogsTable.js b/web/src/components/LogsTable.js
index 04a1be40..cf1dbaea 100644
--- a/web/src/components/LogsTable.js
+++ b/web/src/components/LogsTable.js
@@ -464,6 +464,8 @@ const LogsTable = () => {
other.model_ratio,
other.model_price,
other.group_ratio,
+ other.cache_tokens || 0,
+ other.cache_ratio || 1.0,
);
return (
{i18next.t('缓存:${{price}} * {{ratio}} = ${{total}} / 1M tokens (缓存比例: {{cacheRatio}})', { + price: cacheRatioPrice, + ratio: groupRatio, + total: cacheRatioPrice * groupRatio, + cacheRatio: cacheRatio + })}
+ )}- {i18next.t('提示 {{input}} tokens / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', { - input: inputTokens, - price: inputRatioPrice, - completion: completionTokens, - compPrice: completionRatioPrice, - ratio: groupRatio, - total: price.toFixed(6) - })} + {cacheTokens > 0 ? + i18next.t('提示 {{nonCacheInput}} tokens + 缓存 {{cacheInput}} tokens * {{cacheRatio}} / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', { + nonCacheInput: inputTokens - cacheTokens, + cacheInput: cacheTokens, + cacheRatio: cacheRatio, + price: inputRatioPrice, + completion: completionTokens, + compPrice: completionRatioPrice, + ratio: groupRatio, + total: price.toFixed(6) + }) : + i18next.t('提示 {{input}} tokens / 1M tokens * ${{price}} + 补全 {{completion}} tokens / 1M tokens * ${{compPrice}} * 分组 {{ratio}} = ${{total}}', { + input: inputTokens, + price: inputRatioPrice, + completion: completionTokens, + compPrice: completionRatioPrice, + ratio: groupRatio, + total: price.toFixed(6) + }) + }
{i18next.t('仅供参考,以实际扣费为准')}
@@ -349,6 +377,8 @@ export function renderModelPriceSimple( modelRatio, modelPrice = -1, groupRatio, + cacheTokens = 0, + cacheRatio = 1.0, ) { if (modelPrice !== -1) { return i18next.t('价格:${{price}} * 分组:{{ratio}}', { @@ -356,10 +386,18 @@ export function renderModelPriceSimple( ratio: groupRatio }); } else { - return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}}', { - ratio: modelRatio, - groupRatio: groupRatio - }); + if (cacheTokens !== 0) { + return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}} * 缓存比例: {{cacheRatio}}', { + ratio: modelRatio, + groupRatio: groupRatio, + cacheRatio: cacheRatio + }); + } else { + return i18next.t('模型: {{ratio}} * 分组: {{groupRatio}}', { + ratio: modelRatio, + groupRatio: groupRatio + }); + } } } @@ -374,6 +412,8 @@ export function renderAudioModelPrice( audioRatio, audioCompletionRatio, groupRatio, + cacheTokens = 0, + cacheRatio = 1.0, ) { // 1 ratio = $0.002 / 1K tokens if (modelPrice !== -1) { @@ -388,8 +428,13 @@ export function renderAudioModelPrice( // 这里的 *2 是因为 1倍率=0.002刀,请勿删除 let inputRatioPrice = modelRatio * 2.0; let completionRatioPrice = modelRatio * 2.0 * completionRatio; + let cacheRatioPrice = modelRatio * 2.0 * cacheRatio; + + // Calculate effective input tokens (non-cached + cached with ratio applied) + const effectiveInputTokens = (inputTokens - cacheTokens) + (cacheTokens * cacheRatio); + let price = - (inputTokens / 1000000) * inputRatioPrice * groupRatio + + (effectiveInputTokens / 1000000) * inputRatioPrice * groupRatio + (completionTokens / 1000000) * completionRatioPrice * groupRatio + (audioInputTokens / 1000000) * inputRatioPrice * audioRatio * groupRatio + (audioCompletionTokens / 1000000) * inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio; @@ -406,6 +451,14 @@ export function renderAudioModelPrice( ratio: groupRatio, total: completionRatioPrice * groupRatio })} + {cacheTokens > 0 && ( +{i18next.t('缓存:${{price}} * {{ratio}} = ${{total}} / 1M tokens (缓存比例: {{cacheRatio}})', { + price: cacheRatioPrice, + ratio: groupRatio, + total: cacheRatioPrice * groupRatio, + cacheRatio: cacheRatio + })}
+ )}{i18next.t('音频提示:${{price}} * {{ratio}} * {{audioRatio}} = ${{total}} / 1M tokens', { price: inputRatioPrice, ratio: groupRatio, @@ -420,12 +473,22 @@ export function renderAudioModelPrice( total: inputRatioPrice * audioRatio * audioCompletionRatio * groupRatio })}
- {i18next.t('文字提示 {{input}} tokens / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', { - input: inputTokens, - price: inputRatioPrice, - completion: completionTokens, - compPrice: completionRatioPrice - })} + {cacheTokens > 0 ? + i18next.t('文字提示 {{nonCacheInput}} tokens + 文字缓存 {{cacheInput}} tokens * {{cacheRatio}} / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', { + nonCacheInput: inputTokens - cacheTokens, + cacheInput: cacheTokens, + cacheRatio: cacheRatio, + price: inputRatioPrice, + completion: completionTokens, + compPrice: completionRatioPrice + }) : + i18next.t('文字提示 {{input}} tokens / 1M tokens * ${{price}} + 文字补全 {{completion}} tokens / 1M tokens * ${{compPrice}} +', { + input: inputTokens, + price: inputRatioPrice, + completion: completionTokens, + compPrice: completionRatioPrice + }) + }
{i18next.t('音频提示 {{input}} tokens / 1M tokens * ${{price}} * {{audioRatio}} + 音频补全 {{completion}} tokens / 1M tokens * ${{price}} * {{audioRatio}} * {{audioCompRatio}}', {
diff --git a/web/src/pages/Setting/Operation/ModelRatioSettings.js b/web/src/pages/Setting/Operation/ModelRatioSettings.js
index 48981d6a..1bc37550 100644
--- a/web/src/pages/Setting/Operation/ModelRatioSettings.js
+++ b/web/src/pages/Setting/Operation/ModelRatioSettings.js
@@ -15,6 +15,7 @@ export default function ModelRatioSettings(props) {
const [inputs, setInputs] = useState({
ModelPrice: '',
ModelRatio: '',
+ CacheRatio: '',
CompletionRatio: '',
});
const refForm = useRef();
@@ -139,6 +140,25 @@ export default function ModelRatioSettings(props) {
/>
+