fix(gemini): handle minimal reasoning effort budget

- Add minimal case to clampThinkingBudgetByEffort to avoid defaulting to full thinking budget
This commit is contained in:
Seefs
2025-12-18 08:10:46 +08:00
parent 2f28f265a9
commit 39df47486c
3 changed files with 7 additions and 11 deletions

View File

@@ -13,6 +13,7 @@ import (
relaycommon "github.com/QuantumNous/new-api/relay/common" relaycommon "github.com/QuantumNous/new-api/relay/common"
"github.com/QuantumNous/new-api/relay/constant" "github.com/QuantumNous/new-api/relay/constant"
"github.com/QuantumNous/new-api/setting/model_setting" "github.com/QuantumNous/new-api/setting/model_setting"
"github.com/QuantumNous/new-api/setting/reasoning"
"github.com/QuantumNous/new-api/types" "github.com/QuantumNous/new-api/types"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
@@ -137,7 +138,7 @@ func (a *Adaptor) GetRequestURL(info *relaycommon.RelayInfo) (string, error) {
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking") info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-thinking")
} else if strings.HasSuffix(info.UpstreamModelName, "-nothinking") { } else if strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking") info.UpstreamModelName = strings.TrimSuffix(info.UpstreamModelName, "-nothinking")
} else if baseModel, level := parseThinkingLevelSuffix(info.UpstreamModelName); level != "" { } else if baseModel, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" {
info.UpstreamModelName = baseModel info.UpstreamModelName = baseModel
} }
} }

View File

@@ -98,6 +98,7 @@ func clampThinkingBudget(modelName string, budget int) int {
// "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens) // "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens)
// "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens) // "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens)
// "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens) // "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens)
// "effort": "minimal" - Allocates a minimal portion of tokens (approximately 5% of max_tokens)
func clampThinkingBudgetByEffort(modelName string, effort string) int { func clampThinkingBudgetByEffort(modelName string, effort string) int {
isNew25Pro := isNew25ProModel(modelName) isNew25Pro := isNew25ProModel(modelName)
is25FlashLite := is25FlashLiteModel(modelName) is25FlashLite := is25FlashLiteModel(modelName)
@@ -118,18 +119,12 @@ func clampThinkingBudgetByEffort(modelName string, effort string) int {
maxBudget = maxBudget * 50 / 100 maxBudget = maxBudget * 50 / 100
case "low": case "low":
maxBudget = maxBudget * 20 / 100 maxBudget = maxBudget * 20 / 100
case "minimal":
maxBudget = maxBudget * 5 / 100
} }
return clampThinkingBudget(modelName, maxBudget) return clampThinkingBudget(modelName, maxBudget)
} }
func parseThinkingLevelSuffix(modelName string) (string, string) {
base, level, ok := reasoning.TrimEffortSuffix(modelName)
if !ok {
return modelName, ""
}
return base, level
}
func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) { func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) {
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
modelName := info.UpstreamModelName modelName := info.UpstreamModelName
@@ -186,7 +181,7 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
ThinkingBudget: common.GetPointer(0), ThinkingBudget: common.GetPointer(0),
} }
} }
} else if _, level := parseThinkingLevelSuffix(modelName); level != "" { } else if _, level, ok := reasoning.TrimEffortSuffix(info.UpstreamModelName); ok && level != "" {
geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{ geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
IncludeThoughts: true, IncludeThoughts: true,
ThinkingLevel: level, ThinkingLevel: level,

View File

@@ -6,7 +6,7 @@ import (
"github.com/samber/lo" "github.com/samber/lo"
) )
var EffortSuffixes = []string{"-high", "-medium", "-low"} var EffortSuffixes = []string{"-high", "-medium", "-low", "-minimal"}
// TrimEffortSuffix -> modelName level(low) exists // TrimEffortSuffix -> modelName level(low) exists
func TrimEffortSuffix(modelName string) (string, string, bool) { func TrimEffortSuffix(modelName string) (string, string, bool) {