refactor: Improve token quota consumption logic
This commit is contained in:
@@ -20,6 +20,10 @@ type PriceData struct {
|
|||||||
ShouldPreConsumedQuota int
|
ShouldPreConsumedQuota int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p PriceData) ToSetting() string {
|
||||||
|
return fmt.Sprintf("ModelPrice: %f, ModelRatio: %f, CompletionRatio: %f, CacheRatio: %f, GroupRatio: %f, UsePrice: %t, CacheCreationRatio: %f, ShouldPreConsumedQuota: %d", p.ModelPrice, p.ModelRatio, p.CompletionRatio, p.CacheRatio, p.GroupRatio, p.UsePrice, p.CacheCreationRatio, p.ShouldPreConsumedQuota)
|
||||||
|
}
|
||||||
|
|
||||||
func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, maxTokens int) (PriceData, error) {
|
func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens int, maxTokens int) (PriceData, error) {
|
||||||
modelPrice, usePrice := operation_setting.GetModelPrice(info.OriginModelName, false)
|
modelPrice, usePrice := operation_setting.GetModelPrice(info.OriginModelName, false)
|
||||||
groupRatio := setting.GetGroupRatio(info.Group)
|
groupRatio := setting.GetGroupRatio(info.Group)
|
||||||
@@ -50,7 +54,8 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
|
|||||||
} else {
|
} else {
|
||||||
preConsumedQuota = int(modelPrice * common.QuotaPerUnit * groupRatio)
|
preConsumedQuota = int(modelPrice * common.QuotaPerUnit * groupRatio)
|
||||||
}
|
}
|
||||||
return PriceData{
|
|
||||||
|
priceData := PriceData{
|
||||||
ModelPrice: modelPrice,
|
ModelPrice: modelPrice,
|
||||||
ModelRatio: modelRatio,
|
ModelRatio: modelRatio,
|
||||||
CompletionRatio: completionRatio,
|
CompletionRatio: completionRatio,
|
||||||
@@ -59,5 +64,11 @@ func ModelPriceHelper(c *gin.Context, info *relaycommon.RelayInfo, promptTokens
|
|||||||
CacheRatio: cacheRatio,
|
CacheRatio: cacheRatio,
|
||||||
CacheCreationRatio: cacheCreationRatio,
|
CacheCreationRatio: cacheCreationRatio,
|
||||||
ShouldPreConsumedQuota: preConsumedQuota,
|
ShouldPreConsumedQuota: preConsumedQuota,
|
||||||
}, nil
|
}
|
||||||
|
|
||||||
|
if common.DebugEnabled {
|
||||||
|
println(fmt.Sprintf("model_price_helper result: %s", priceData.ToSetting()))
|
||||||
|
}
|
||||||
|
|
||||||
|
return priceData, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ func TextHelper(c *gin.Context) (openaiErr *dto.OpenAIErrorWithStatusCode) {
|
|||||||
c.Set("prompt_tokens", promptTokens)
|
c.Set("prompt_tokens", promptTokens)
|
||||||
}
|
}
|
||||||
|
|
||||||
priceData, err := helper.ModelPriceHelper(c, relayInfo, promptTokens, int(textRequest.MaxTokens))
|
priceData, err := helper.ModelPriceHelper(c, relayInfo, promptTokens, int(math.Max(float64(textRequest.MaxTokens), float64(textRequest.MaxCompletionTokens))))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return service.OpenAIErrorWrapperLocal(err, "model_price_error", http.StatusInternalServerError)
|
return service.OpenAIErrorWrapperLocal(err, "model_price_error", http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
@@ -372,17 +372,18 @@ func postConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
|||||||
common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
||||||
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, preConsumedQuota))
|
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, preConsumedQuota))
|
||||||
} else {
|
} else {
|
||||||
quotaDelta := quota - preConsumedQuota
|
|
||||||
if quotaDelta != 0 {
|
|
||||||
err := service.PostConsumeQuota(relayInfo, quotaDelta, preConsumedQuota, true)
|
|
||||||
if err != nil {
|
|
||||||
common.LogError(ctx, "error consuming token remain quota: "+err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
||||||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
quotaDelta := quota - preConsumedQuota
|
||||||
|
if quotaDelta != 0 {
|
||||||
|
err := service.PostConsumeQuota(relayInfo, quotaDelta, preConsumedQuota, true)
|
||||||
|
if err != nil {
|
||||||
|
common.LogError(ctx, "error consuming token remain quota: "+err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
logModel := modelName
|
logModel := modelName
|
||||||
if strings.HasPrefix(logModel, "gpt-4-gizmo") {
|
if strings.HasPrefix(logModel, "gpt-4-gizmo") {
|
||||||
logModel = "gpt-4-gizmo-*"
|
logModel = "gpt-4-gizmo-*"
|
||||||
|
|||||||
@@ -243,20 +243,18 @@ func PostClaudeConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
|||||||
common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
||||||
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, preConsumedQuota))
|
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, modelName, preConsumedQuota))
|
||||||
} else {
|
} else {
|
||||||
//if sensitiveResp != nil {
|
|
||||||
// logContent += fmt.Sprintf(",敏感词:%s", strings.Join(sensitiveResp.SensitiveWords, ", "))
|
|
||||||
//}
|
|
||||||
quotaDelta := quota - preConsumedQuota
|
|
||||||
if quotaDelta != 0 {
|
|
||||||
err := PostConsumeQuota(relayInfo, quotaDelta, preConsumedQuota, true)
|
|
||||||
if err != nil {
|
|
||||||
common.LogError(ctx, "error consuming token remain quota: "+err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
||||||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
quotaDelta := quota - preConsumedQuota
|
||||||
|
if quotaDelta != 0 {
|
||||||
|
err := PostConsumeQuota(relayInfo, quotaDelta, preConsumedQuota, true)
|
||||||
|
if err != nil {
|
||||||
|
common.LogError(ctx, "error consuming token remain quota: "+err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
other := GenerateClaudeOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio,
|
other := GenerateClaudeOtherInfo(ctx, relayInfo, modelRatio, groupRatio, completionRatio,
|
||||||
cacheTokens, cacheRatio, cacheCreationTokens, cacheCreationRatio, modelPrice)
|
cacheTokens, cacheRatio, cacheCreationTokens, cacheCreationRatio, modelPrice)
|
||||||
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, modelName,
|
model.RecordConsumeLog(ctx, relayInfo.UserId, relayInfo.ChannelId, promptTokens, completionTokens, modelName,
|
||||||
@@ -318,17 +316,18 @@ func PostAudioConsumeQuota(ctx *gin.Context, relayInfo *relaycommon.RelayInfo,
|
|||||||
common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
common.LogError(ctx, fmt.Sprintf("total tokens is 0, cannot consume quota, userId %d, channelId %d, "+
|
||||||
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.OriginModelName, preConsumedQuota))
|
"tokenId %d, model %s, pre-consumed quota %d", relayInfo.UserId, relayInfo.ChannelId, relayInfo.TokenId, relayInfo.OriginModelName, preConsumedQuota))
|
||||||
} else {
|
} else {
|
||||||
quotaDelta := quota - preConsumedQuota
|
|
||||||
if quotaDelta != 0 {
|
|
||||||
err := PostConsumeQuota(relayInfo, quotaDelta, preConsumedQuota, true)
|
|
||||||
if err != nil {
|
|
||||||
common.LogError(ctx, "error consuming token remain quota: "+err.Error())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
model.UpdateUserUsedQuotaAndRequestCount(relayInfo.UserId, quota)
|
||||||
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
model.UpdateChannelUsedQuota(relayInfo.ChannelId, quota)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
quotaDelta := quota - preConsumedQuota
|
||||||
|
if quotaDelta != 0 {
|
||||||
|
err := PostConsumeQuota(relayInfo, quotaDelta, preConsumedQuota, true)
|
||||||
|
if err != nil {
|
||||||
|
common.LogError(ctx, "error consuming token remain quota: "+err.Error())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
logModel := relayInfo.OriginModelName
|
logModel := relayInfo.OriginModelName
|
||||||
if extraContent != "" {
|
if extraContent != "" {
|
||||||
logContent += ", " + extraContent
|
logContent += ", " + extraContent
|
||||||
|
|||||||
Reference in New Issue
Block a user