diff --git a/relay/channel/gemini/relay-gemini.go b/relay/channel/gemini/relay-gemini.go index adc771e2..0f4a54cf 100644 --- a/relay/channel/gemini/relay-gemini.go +++ b/relay/channel/gemini/relay-gemini.go @@ -49,12 +49,20 @@ const ( flash25LiteMaxBudget = 24576 ) -// clampThinkingBudget 根据模型名称将预算限制在允许的范围内 -func clampThinkingBudget(modelName string, budget int) int { - isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") && +func isNew25ProModel(modelName string) bool { + return strings.HasPrefix(modelName, "gemini-2.5-pro") && !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") && !strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25") - is25FlashLite := strings.HasPrefix(modelName, "gemini-2.5-flash-lite") +} + +func is25FlashLiteModel(modelName string) bool { + return strings.HasPrefix(modelName, "gemini-2.5-flash-lite") +} + +// clampThinkingBudget 根据模型名称将预算限制在允许的范围内 +func clampThinkingBudget(modelName string, budget int) int { + isNew25Pro := isNew25ProModel(modelName) + is25FlashLite := is25FlashLiteModel(modelName) if is25FlashLite { if budget < flash25LiteMinBudget { @@ -81,7 +89,34 @@ func clampThinkingBudget(modelName string, budget int) int { return budget } -func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo) { +// "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens) +// "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens) +// "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens) +func clampThinkingBudgetByEffort(modelName string, effort string) int { + isNew25Pro := isNew25ProModel(modelName) + is25FlashLite := is25FlashLiteModel(modelName) + + maxBudget := 0 + if is25FlashLite { + maxBudget = flash25LiteMaxBudget + } + if isNew25Pro { + maxBudget = pro25MaxBudget + } else { + maxBudget = flash25MaxBudget + } + switch effort { + case "high": + return maxBudget * 80 / 100 + case "medium": + return maxBudget * 50 / 100 + case "low": + return maxBudget * 20 / 100 + } + return maxBudget * 50 / 100 // 默认medium +} + +func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) { if model_setting.GetGeminiSettings().ThinkingAdapterEnabled { modelName := info.UpstreamModelName isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") && @@ -124,6 +159,11 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens) clampedBudget := clampThinkingBudget(modelName, int(budgetTokens)) geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampedBudget) + } else { + if len(oaiRequest) > 0 { + // 如果有reasoningEffort参数,则根据其值设置思考预算 + geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampThinkingBudgetByEffort(modelName, oaiRequest[0].ReasoningEffort)) + } } } } else if strings.HasSuffix(modelName, "-nothinking") { @@ -156,7 +196,37 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon } } - ThinkingAdaptor(&geminiRequest, info) + adaptorWithExtraBody := false + + if len(textRequest.ExtraBody) > 0 { + if !strings.HasSuffix(info.UpstreamModelName, "-nothinking") { + var extraBody map[string]interface{} + if err := common.Unmarshal(textRequest.ExtraBody, &extraBody); err != nil { + return nil, fmt.Errorf("invalid extra body: %w", err) + } + // eg. {"google":{"thinking_config":{"thinking_budget":5324,"include_thoughts":true}}} + if googleBody, ok := extraBody["google"].(map[string]interface{}); ok { + adaptorWithExtraBody = true + if thinkingConfig, ok := googleBody["thinking_config"].(map[string]interface{}); ok { + if budget, ok := thinkingConfig["thinking_budget"].(float64); ok { + budgetInt := int(budget) + geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{ + ThinkingBudget: common.GetPointer(budgetInt), + IncludeThoughts: true, + } + } else { + geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{ + IncludeThoughts: true, + } + } + } + } + } + } + + if !adaptorWithExtraBody { + ThinkingAdaptor(&geminiRequest, info, textRequest) + } safetySettings := make([]dto.GeminiChatSafetySettings, 0, len(SafetySettingList)) for _, category := range SafetySettingList {