From 21f43b0dd8b033c51157c6215961df511a0aace7 Mon Sep 17 00:00:00 2001 From: RedwindA Date: Wed, 18 Jun 2025 00:49:35 +0800 Subject: [PATCH] feat(Gemini): enhance budget clamping logic for Gemini models --- relay/channel/gemini/relay-gemini.go | 115 ++++++++++++--------------- 1 file changed, 53 insertions(+), 62 deletions(-) diff --git a/relay/channel/gemini/relay-gemini.go b/relay/channel/gemini/relay-gemini.go index 7eb8e8f7..635041d7 100644 --- a/relay/channel/gemini/relay-gemini.go +++ b/relay/channel/gemini/relay-gemini.go @@ -39,11 +39,45 @@ var geminiSupportedMimeTypes = map[string]bool{ // Gemini 允许的思考预算范围 const ( - pro25MinBudget = 128 - pro25MaxBudget = 32768 - flash25MaxBudget = 24576 + pro25MinBudget = 128 + pro25MaxBudget = 32768 + flash25MaxBudget = 24576 + flash25LiteMinBudget = 512 + flash25LiteMaxBudget = 24576 ) +// clampThinkingBudget 根据模型名称将预算限制在允许的范围内 +func clampThinkingBudget(modelName string, budget int) int { + isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") && + !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") && + !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25") + is25FlashLite := strings.HasPrefix(modelName, "gemini-2.5-flash-lite") + + if is25FlashLite { + if budget < flash25LiteMinBudget { + return flash25LiteMinBudget + } + if budget > flash25LiteMaxBudget { + return flash25LiteMaxBudget + } + } else if isNew25Pro { + if budget < pro25MinBudget { + return pro25MinBudget + } + if budget > pro25MaxBudget { + return pro25MaxBudget + } + } else { // 其他模型 + if budget < 0 { + return 0 + } + if budget > flash25MaxBudget { + return flash25MaxBudget + } + } + return budget +} + // Setting safety to the lowest possible values since Gemini is already powerless enough func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon.RelayInfo) (*GeminiChatRequest, error) { @@ -65,49 +99,31 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon } if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { - // 新增逻辑:处理 -thinking- 格式 - if strings.Contains(info.OriginModelName, "-thinking-") { - parts := strings.SplitN(info.OriginModelName, "-thinking-", 2) + modelName := info.OriginModelName + isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") && + !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") && + !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25") + is25FlashLite := strings.HasPrefix(modelName, "gemini-2.5-flash-lite") + + if strings.Contains(modelName, "-thinking-") { + parts := strings.SplitN(modelName, "-thinking-", 2) if len(parts) == 2 && parts[1] != "" { if budgetTokens, err := strconv.Atoi(parts[1]); err == nil { - // 从模型名称成功解析预算 - isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") && - !strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") && - !strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25") - - if isNew25Pro { - // 新的2.5pro模型:ThinkingBudget范围为128-32768 - if budgetTokens < pro25MinBudget { - budgetTokens = pro25MinBudget - } else if budgetTokens > pro25MaxBudget { - budgetTokens = pro25MaxBudget - } - } else { - // 其他模型:ThinkingBudget范围为0-24576 - if budgetTokens < 0 { - budgetTokens = 0 - } else if budgetTokens > flash25MaxBudget { - budgetTokens = flash25MaxBudget - } - } - + clampedBudget := clampThinkingBudget(modelName, budgetTokens) geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{ - ThinkingBudget: common.GetPointer(budgetTokens), + ThinkingBudget: common.GetPointer(clampedBudget), IncludeThoughts: true, } } - // 如果解析失败,则不设置ThinkingConfig,静默处理 } - } else if strings.HasSuffix(info.OriginModelName, "-thinking") { // 保留旧逻辑以兼容 - // 硬编码不支持 ThinkingBudget 的旧模型 + } else if strings.HasSuffix(modelName, "-thinking") { unsupportedModels := []string{ "gemini-2.5-pro-preview-05-06", "gemini-2.5-pro-preview-03-25", } - isUnsupported := false for _, unsupportedModel := range unsupportedModels { - if strings.HasPrefix(info.OriginModelName, unsupportedModel) { + if strings.HasPrefix(modelName, unsupportedModel) { isUnsupported = true break } @@ -119,39 +135,14 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon } } else { budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens) - - // 检查是否为新的2.5pro模型(支持ThinkingBudget但有特殊范围) - isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") && - !strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") && - !strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25") - - if isNew25Pro { - // 新的2.5pro模型:ThinkingBudget范围为128-32768 - if budgetTokens == 0 || budgetTokens < 128 { - budgetTokens = 128 - } else if budgetTokens > 32768 { - budgetTokens = 32768 - } - } else { - // 其他模型:ThinkingBudget范围为0-24576 - if budgetTokens == 0 || budgetTokens > 24576 { - budgetTokens = 24576 - } - } - + clampedBudget := clampThinkingBudget(modelName, int(budgetTokens)) geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{ - ThinkingBudget: common.GetPointer(int(budgetTokens)), + ThinkingBudget: common.GetPointer(clampedBudget), IncludeThoughts: true, } } - } else if strings.HasSuffix(info.OriginModelName, "-nothinking") { - // 检查是否为新的2.5pro模型(不支持-nothinking,因为最低值只能为128) - isNew25Pro := strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro") && - !strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-05-06") && - !strings.HasPrefix(info.OriginModelName, "gemini-2.5-pro-preview-03-25") - - if !isNew25Pro { - // 只有非新2.5pro模型才支持-nothinking + } else if strings.HasSuffix(modelName, "-nothinking") { + if !isNew25Pro && !is25FlashLite { geminiRequest.GenerationConfig.ThinkingConfig = &GeminiThinkingConfig{ ThinkingBudget: common.GetPointer(0), }