Summary • Migrated all ratio-related sources into `setting/ratio_setting/` – `model_ratio.go` (renamed from model-ratio.go) – `cache_ratio.go` – `group_ratio.go` • Changed package name to `ratio_setting` and relocated initialization (`ratio_setting.InitRatioSettings()` in main). • Updated every import & call site: – Model / cache / completion / image ratio helpers – Group ratio helpers (`GetGroupRatio*`, `ContainsGroupRatio`, `CheckGroupRatio`, etc.) – JSON-serialization & update helpers (`*Ratio2JSONString`, `Update*RatioByJSONString`) • Adjusted controllers, middleware, relay helpers, services and models to reference the new package. • Removed obsolete `setting` / `operation_setting` imports; added missing `ratio_setting` imports. • Adopted idiomatic map iteration (`for key := range m`) where value is unused. • Ran static checks to ensure clean build. This commit centralises all ratio configuration (model, cache and group) in one cohesive module, simplifying future maintenance and improving code clarity.
438 lines
16 KiB
Go
438 lines
16 KiB
Go
package service
|
||
|
||
import (
|
||
"errors"
|
||
"fmt"
|
||
"log"
|
||
"one-api/common"
|
||
constant2 "one-api/constant"
|
||
"one-api/dto"
|
||
"one-api/model"
|
||
relaycommon "one-api/relay/common"
|
||
"one-api/relay/helper"
|
||
"one-api/setting"
|
||
"one-api/setting/ratio_setting"
|
||
"strings"
|
||
"time"
|
||
|
||
"github.com/bytedance/gopkg/util/gopool"
|
||
|
||
"github.com/gin-gonic/gin"
|
||
"github.com/shopspring/decimal"
|
||
)
|
||
|
||
type TokenDetails struct {
|
||
TextTokens int
|
||
AudioTokens int
|
||
}
|
||
|
||
type QuotaInfo struct {
|
||
InputDetails TokenDetails
|
||
OutputDetails TokenDetails
|
||
ModelName string
|
||
UsePrice bool
|
||
ModelPrice float64
|
||
ModelRatio float64
|
||
GroupRatio float64
|
||
}
|
||
|
||
func calculateAudioQuota(info QuotaInfo) int {
|
||
if info.UsePrice {
|
||
modelPrice := decimal.NewFromFloat(info.ModelPrice)
|
||
quotaPerUnit := decimal.NewFromFloat(common.QuotaPerUnit)
|
||
groupRatio := decimal.NewFromFloat(info.GroupRatio)
|
||
|
||
quota := modelPrice.Mul(quotaPerUnit).Mul(groupRatio)
|
||
return int(quota.IntPart())
|
||
}
|
||
|
||
completionRatio := decimal.NewFromFloat(ratio_setting.GetCompletionRatio(info.ModelName))
|
||
audioRatio := decimal.NewFromFloat(ratio_setting.GetAudioRatio(info.ModelName))
|
||
audioCompletionRatio := decimal.NewFromFloat(ratio_setting.GetAudioCompletionRatio(info.ModelName))
|
||
|
||
groupRatio := decimal.NewFromFloat(info.GroupRatio)
|
||
modelRatio := decimal.NewFromFloat(info.ModelRatio)
|
||
ratio := groupRatio.Mul(modelRatio)
|
||
|
||
inputTextTokens := decimal.NewFromInt(int64(info.InputDetails.TextTokens))
|
||
outputTextTokens := decimal.NewFromInt(int64(info.OutputDetails.TextTokens))
|
||
inputAudioTokens := decimal.NewFromInt(int64(info.InputDetails.AudioTokens))
|
||
outputAudioTokens := decimal.NewFromInt(int64(info.OutputDetails.AudioTokens))
|
||
|
||
quota := decimal.Zero
|
||
quota = quota.Add(inputTextTokens)
|
||
quota = quota.Add(outputTextTokens.Mul(completionRatio))
|
||
quota = quota.Add(inputAudioTokens.Mul(audioRatio))
|
||
quota = quota.Add(outputAudioTokens.Mul(audioRatio).Mul(audioCompletionRatio))
|
||
|
||
quota = quota.Mul(ratio)
|
||
|
||
// If ratio is not zero and quota is less than or equal to zero, set quota to 1
|
||
if !ratio.IsZero() && quota.LessThanOrEqual(decimal.Zero) {
|
||
quota = decimal.NewFromInt(1)
|
||
}
|
||
|
||
return int(quota.Round(0).IntPart())
|
||
}
|
||
|
||
func PreWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, usage *dto.RealtimeUsage) error {
|
||
if relayInfo.UsePrice {
|
||
return nil
|
||
}
|
||
userQuota, err := model.GetUserQuota(relayInfo.UserId, false)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
token, err := model.GetTokenByKey(strings.TrimLeft(relayInfo.TokenKey, "sk-"), false)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
modelName := relayInfo.OriginModelName
|
||
textInputTokens := usage.InputTokenDetails.TextTokens
|
||
textOutTokens := usage.OutputTokenDetails.TextTokens
|
||
audioInputTokens := usage.InputTokenDetails.AudioTokens
|
||
audioOutTokens := usage.OutputTokenDetails.AudioTokens
|
||
groupRatio := ratio_setting.GetGroupRatio(relayInfo.Group)
|
||
modelRatio, _ := ratio_setting.GetModelRatio(modelName)
|
||
|
||
autoGroup, exists := ctx.Get("auto_group")
|
||
if exists {
|
||
groupRatio = ratio_setting.GetGroupRatio(autoGroup.(string))
|
||
log.Printf("final group ratio: %f", groupRatio)
|
||
relayInfo.Group = autoGroup.(string)
|
||
}
|
||
|
||
actualGroupRatio := groupRatio
|
||
userGroupRatio, ok := ratio_setting.GetGroupGroupRatio(relayInfo.UserGroup, relayInfo.Group)
|
||
if ok {
|
||
actualGroupRatio = userGroupRatio
|
||
}
|
||
|
||
quotaInfo := QuotaInfo{
|
||
InputDetails: TokenDetails{
|
||
TextTokens: textInputTokens,
|
||
AudioTokens: audioInputTokens,
|
||
},
|
||
OutputDetails: TokenDetails{
|
||
TextTokens: textOutTokens,
|
||
AudioTokens: audioOutTokens,
|
||
},
|
||
ModelName: modelName,
|
||
UsePrice: relayInfo.UsePrice,
|
||
ModelRatio: modelRatio,
|
||
GroupRatio: actualGroupRatio,
|
||
}
|
||
|
||
quota := calculateAudioQuota(quotaInfo)
|
||
|
||
if userQuota < quota {
|
||
return fmt.Errorf("user quota is not enough, user quota: %s, need quota: %s", common.FormatQuota(userQuota), common.FormatQuota(quota))
|
||
}
|
||
|
||
if !token.UnlimitedQuota && token.RemainQuota < quota {
|
||
return fmt.Errorf("token quota is not enough, token remain quota: %s, need quota: %s", common.FormatQuota(token.RemainQuota), common.FormatQuota(quota))
|
||
}
|
||
|
||
err = PostConsumeQuota(relayInfo, quota, 0, false)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
common.LogInfo(ctx, "realtime streaming consume quota success, quota: "+fmt.Sprintf("%d", quota))
|
||
return nil
|
||
}
|
||
|
||
func PostWssConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo, modelName string,
|
||
usage *dto.RealtimeUsage, preConsumedQuota int, userQuota int, priceData helper.PriceData, extraContent string) {
|
||
|
||
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
||
textInputTokens := usage.InputTokenDetails.TextTokens
|
||
textOutTokens := usage.OutputTokenDetails.TextTokens
|
||
|
||
audioInputTokens := usage.InputTokenDetails.AudioTokens
|
||
audioOutTokens := usage.OutputTokenDetails.AudioTokens
|
||
|
||
tokenName := ctx.GetString("token_name")
|
||
completionRatio := decimal.NewFromFloat(ratio_setting.GetCompletionRatio(modelName))
|
||
audioRatio := decimal.NewFromFloat(ratio_setting.GetAudioRatio(relayInfo.OriginModelName))
|
||
audioCompletionRatio := decimal.NewFromFloat(ratio_setting.GetAudioCompletionRatio(modelName))
|
||
|
||
modelRatio := priceData.ModelRatio
|
||
groupRatio := priceData.GroupRatioInfo.GroupRatio
|
||
modelPrice := priceData.ModelPrice
|
||
usePrice := priceData.UsePrice
|
||
|
||
quotaInfo := QuotaInfo{
|
||
InputDetails: TokenDetails{
|
||
TextTokens: textInputTokens,
|
||
AudioTokens: audioInputTokens,
|
||
},
|
||
OutputDetails: TokenDetails{
|
||
TextTokens: textOutTokens,
|
||
AudioTokens: audioOutTokens,
|
||
},
|
||
ModelName: modelName,
|
||
UsePrice: usePrice,
|
||
ModelRatio: modelRatio,
|
||
GroupRatio: groupRatio,
|
||
}
|
||
|
||
quota := calculateAudioQuota(quotaInfo)
|
||
|
||
totalTokens := usage.TotalTokens
|
||
var logContent string
|
||
if !usePrice {
|
||
logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,音频倍率 %.2f,音频补全倍率 %.2f,分组倍率 %.2f",
|
||
modelRatio, completionRatio.InexactFloat64(), audioRatio.InexactFloat64(), audioCompletionRatio.InexactFloat64(), groupRatio)
|
||
} else {
|
||
logContent = fmt.Sprintf("模型价格 %.2f,分组倍率 %.2f", modelPrice, groupRatio)
|
||
}
|
||
|
||
// record all the consume log even if quota is 0
|
||
if totalTokens == 0 {
|
||
// in this case, must be some error happened
|
||
// we cannot just return, because we may have to return the pre-consumed quota
|
||
quota = 0
|
||
logContent += fmt.Sprintf("(可能是上游超时)")
|
||
common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
||
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, preConsumedQuota))
|
||
} else {
|
||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
||
}
|
||
|
||
logModel := modelName
|
||
if extraContent != "" {
|
||
logContent += ", " + extraContent
|
||
}
|
||
other := GenerateWssOtherInfo(ctx, relayInfo, usage, modelRatio, groupRatio,
|
||
completionRatio.InexactFloat64(), audioRatio.InexactFloat64(), audioCompletionRatio.InexactFloat64(), modelPrice, priceData.GroupRatioInfo.GroupSpecialRatio)
|
||
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, usage.InputTokens, usage.OutputTokens, logModel,
|
||
tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
|
||
}
|
||
|
||
func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
||
usage *dto.Usage, preConsumedQuota int, userQuota int, priceData helper.PriceData, extraContent string) {
|
||
|
||
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
||
promptTokens := usage.PromptTokens
|
||
completionTokens := usage.CompletionTokens
|
||
modelName := relayInfo.OriginModelName
|
||
|
||
tokenName := ctx.GetString("token_name")
|
||
completionRatio := priceData.CompletionRatio
|
||
modelRatio := priceData.ModelRatio
|
||
groupRatio := priceData.GroupRatioInfo.GroupRatio
|
||
modelPrice := priceData.ModelPrice
|
||
cacheRatio := priceData.CacheRatio
|
||
cacheTokens := usage.PromptTokensDetails.CachedTokens
|
||
|
||
cacheCreationRatio := priceData.CacheCreationRatio
|
||
cacheCreationTokens := usage.PromptTokensDetails.CachedCreationTokens
|
||
|
||
calculateQuota := 0.0
|
||
if !priceData.UsePrice {
|
||
calculateQuota = float64(promptTokens)
|
||
calculateQuota += float64(cacheTokens) * cacheRatio
|
||
calculateQuota += float64(cacheCreationTokens) * cacheCreationRatio
|
||
calculateQuota += float64(completionTokens) * completionRatio
|
||
calculateQuota = calculateQuota * groupRatio * modelRatio
|
||
} else {
|
||
calculateQuota = modelPrice * common.QuotaPerUnit * groupRatio
|
||
}
|
||
|
||
if modelRatio != 0 && calculateQuota <= 0 {
|
||
calculateQuota = 1
|
||
}
|
||
|
||
quota := int(calculateQuota)
|
||
|
||
totalTokens := promptTokens + completionTokens
|
||
|
||
var logContent string
|
||
// record all the consume log even if quota is 0
|
||
if totalTokens == 0 {
|
||
// in this case, must be some error happened
|
||
// we cannot just return, because we may have to return the pre-consumed quota
|
||
quota = 0
|
||
logContent += fmt.Sprintf("(可能是上游出错)")
|
||
common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
||
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, preConsumedQuota))
|
||
} else {
|
||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
||
}
|
||
|
||
quotaDelta := quota - preConsumedQuota
|
||
if quotaDelta != 0 {
|
||
err := PostConsumeQuota(relayInfo, quotaDelta, preConsumedQuota, true)
|
||
if err != nil {
|
||
common.LogError(ctx, "error consuming token remain quota: "+err.Error())
|
||
}
|
||
}
|
||
|
||
other := GenerateClaudeOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio,
|
||
cacheTokens, cacheRatio, cacheCreationTokens, cacheCreationRatio, modelPrice, priceData.GroupRatioInfo.GroupSpecialRatio)
|
||
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, modelName,
|
||
tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
|
||
}
|
||
|
||
func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
||
usage *dto.Usage, preConsumedQuota int, userQuota int, priceData helper.PriceData, extraContent string) {
|
||
|
||
useTimeSeconds := time.Now().Unix() - relayInfo.StartTime.Unix()
|
||
textInputTokens := usage.PromptTokensDetails.TextTokens
|
||
textOutTokens := usage.CompletionTokenDetails.TextTokens
|
||
|
||
audioInputTokens := usage.PromptTokensDetails.AudioTokens
|
||
audioOutTokens := usage.CompletionTokenDetails.AudioTokens
|
||
|
||
tokenName := ctx.GetString("token_name")
|
||
completionRatio := decimal.NewFromFloat(ratio_setting.GetCompletionRatio(relayInfo.OriginModelName))
|
||
audioRatio := decimal.NewFromFloat(ratio_setting.GetAudioRatio(relayInfo.OriginModelName))
|
||
audioCompletionRatio := decimal.NewFromFloat(ratio_setting.GetAudioCompletionRatio(relayInfo.OriginModelName))
|
||
|
||
modelRatio := priceData.ModelRatio
|
||
groupRatio := priceData.GroupRatioInfo.GroupRatio
|
||
modelPrice := priceData.ModelPrice
|
||
usePrice := priceData.UsePrice
|
||
|
||
quotaInfo := QuotaInfo{
|
||
InputDetails: TokenDetails{
|
||
TextTokens: textInputTokens,
|
||
AudioTokens: audioInputTokens,
|
||
},
|
||
OutputDetails: TokenDetails{
|
||
TextTokens: textOutTokens,
|
||
AudioTokens: audioOutTokens,
|
||
},
|
||
ModelName: relayInfo.OriginModelName,
|
||
UsePrice: usePrice,
|
||
ModelRatio: modelRatio,
|
||
GroupRatio: groupRatio,
|
||
}
|
||
|
||
quota := calculateAudioQuota(quotaInfo)
|
||
|
||
totalTokens := usage.TotalTokens
|
||
var logContent string
|
||
if !usePrice {
|
||
logContent = fmt.Sprintf("模型倍率 %.2f,补全倍率 %.2f,音频倍率 %.2f,音频补全倍率 %.2f,分组倍率 %.2f",
|
||
modelRatio, completionRatio.InexactFloat64(), audioRatio.InexactFloat64(), audioCompletionRatio.InexactFloat64(), groupRatio)
|
||
} else {
|
||
logContent = fmt.Sprintf("模型价格 %.2f,分组倍率 %.2f", modelPrice, groupRatio)
|
||
}
|
||
|
||
// record all the consume log even if quota is 0
|
||
if totalTokens == 0 {
|
||
// in this case, must be some error happened
|
||
// we cannot just return, because we may have to return the pre-consumed quota
|
||
quota = 0
|
||
logContent += fmt.Sprintf("(可能是上游超时)")
|
||
common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
||
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.OriginModelName, preConsumedQuota))
|
||
} else {
|
||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
||
}
|
||
|
||
quotaDelta := quota - preConsumedQuota
|
||
if quotaDelta != 0 {
|
||
err := PostConsumeQuota(relayInfo, quotaDelta, preConsumedQuota, true)
|
||
if err != nil {
|
||
common.LogError(ctx, "error consuming token remain quota: "+err.Error())
|
||
}
|
||
}
|
||
|
||
logModel := relayInfo.OriginModelName
|
||
if extraContent != "" {
|
||
logContent += ", " + extraContent
|
||
}
|
||
other := GenerateAudioOtherInfo(ctx, relayInfo, usage, modelRatio, groupRatio,
|
||
completionRatio.InexactFloat64(), audioRatio.InexactFloat64(), audioCompletionRatio.InexactFloat64(), modelPrice, priceData.GroupRatioInfo.GroupSpecialRatio)
|
||
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, usage.PromptTokens, usage.CompletionTokens, logModel,
|
||
tokenName, quota, logContent, relayInfo.TokenId, userQuota, int(useTimeSeconds), relayInfo.IsStream, relayInfo.Group, other)
|
||
}
|
||
|
||
func PreConsumeTokenQuota(relayInfo *relaycommon.RelayInfo, quota int) error {
|
||
if quota < 0 {
|
||
return errors.New("quota 不能为负数!")
|
||
}
|
||
if relayInfo.IsPlayground {
|
||
return nil
|
||
}
|
||
//if relayInfo.TokenUnlimited {
|
||
// return nil
|
||
//}
|
||
token, err := model.GetTokenByKey(relayInfo.TokenKey, false)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
if !relayInfo.TokenUnlimited && token.RemainQuota < quota {
|
||
return fmt.Errorf("token quota is not enough, token remain quota: %s, need quota: %s", common.FormatQuota(token.RemainQuota), common.FormatQuota(quota))
|
||
}
|
||
err = model.DecreaseTokenQuota(relayInfo.TokenId, relayInfo.TokenKey, quota)
|
||
if err != nil {
|
||
return err
|
||
}
|
||
return nil
|
||
}
|
||
|
||
func PostConsumeQuota(relayInfo *relaycommon.RelayInfo, quota int, preConsumedQuota int, sendEmail bool) (err error) {
|
||
|
||
if quota > 0 {
|
||
err = model.DecreaseUserQuota(relayInfo.UserId, quota)
|
||
} else {
|
||
err = model.IncreaseUserQuota(relayInfo.UserId, -quota, false)
|
||
}
|
||
if err != nil {
|
||
return err
|
||
}
|
||
|
||
if !relayInfo.IsPlayground {
|
||
if quota > 0 {
|
||
err = model.DecreaseTokenQuota(relayInfo.TokenId, relayInfo.TokenKey, quota)
|
||
} else {
|
||
err = model.IncreaseTokenQuota(relayInfo.TokenId, relayInfo.TokenKey, -quota)
|
||
}
|
||
if err != nil {
|
||
return err
|
||
}
|
||
}
|
||
|
||
if sendEmail {
|
||
if (quota + preConsumedQuota) != 0 {
|
||
checkAndSendQuotaNotify(relayInfo, quota, preConsumedQuota)
|
||
}
|
||
}
|
||
|
||
return nil
|
||
}
|
||
|
||
func checkAndSendQuotaNotify(relayInfo *relaycommon.RelayInfo, quota int, preConsumedQuota int) {
|
||
gopool.Go(func() {
|
||
userSetting := relayInfo.UserSetting
|
||
threshold := common.QuotaRemindThreshold
|
||
if userCustomThreshold, ok := userSetting[constant2.UserSettingQuotaWarningThreshold]; ok {
|
||
threshold = int(userCustomThreshold.(float64))
|
||
}
|
||
|
||
//noMoreQuota := userCache.Quota-(quota+preConsumedQuota) <= 0
|
||
quotaTooLow := false
|
||
consumeQuota := quota + preConsumedQuota
|
||
if relayInfo.UserQuota-consumeQuota < threshold {
|
||
quotaTooLow = true
|
||
}
|
||
if quotaTooLow {
|
||
prompt := "您的额度即将用尽"
|
||
topUpLink := fmt.Sprintf("%s/topup", setting.ServerAddress)
|
||
content := "{{value}},当前剩余额度为 {{value}},为了不影响您的使用,请及时充值。<br/>充值链接:<a href='{{value}}'>{{value}}</a>"
|
||
err := NotifyUser(relayInfo.UserId, relayInfo.UserEmail, relayInfo.UserSetting, dto.NewNotify(dto.NotifyTypeQuotaExceed, prompt, content, []interface{}{prompt, common.FormatQuota(relayInfo.UserQuota), topUpLink, topUpLink}))
|
||
if err != nil {
|
||
common.SysError(fmt.Sprintf("failed to send quota notify to user %d: %s", relayInfo.UserId, err.Error()))
|
||
}
|
||
}
|
||
})
|
||
}
|