feat: enhance ThinkingAdaptor with effort-based budget clamping and extra body handling
This commit is contained in:
@@ -49,12 +49,20 @@ const (
|
||||
flash25LiteMaxBudget = 24576
|
||||
)
|
||||
|
||||
// clampThinkingBudget 根据模型名称将预算限制在允许的范围内
|
||||
func clampThinkingBudget(modelName string, budget int) int {
|
||||
isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
||||
func isNew25ProModel(modelName string) bool {
|
||||
return strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
||||
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
|
||||
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
|
||||
is25FlashLite := strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
|
||||
}
|
||||
|
||||
func is25FlashLiteModel(modelName string) bool {
|
||||
return strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
|
||||
}
|
||||
|
||||
// clampThinkingBudget 根据模型名称将预算限制在允许的范围内
|
||||
func clampThinkingBudget(modelName string, budget int) int {
|
||||
isNew25Pro := isNew25ProModel(modelName)
|
||||
is25FlashLite := is25FlashLiteModel(modelName)
|
||||
|
||||
if is25FlashLite {
|
||||
if budget < flash25LiteMinBudget {
|
||||
@@ -81,7 +89,34 @@ func clampThinkingBudget(modelName string, budget int) int {
|
||||
return budget
|
||||
}
|
||||
|
||||
func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo) {
|
||||
// "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens)
|
||||
// "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens)
|
||||
// "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens)
|
||||
func clampThinkingBudgetByEffort(modelName string, effort string) int {
|
||||
isNew25Pro := isNew25ProModel(modelName)
|
||||
is25FlashLite := is25FlashLiteModel(modelName)
|
||||
|
||||
maxBudget := 0
|
||||
if is25FlashLite {
|
||||
maxBudget = flash25LiteMaxBudget
|
||||
}
|
||||
if isNew25Pro {
|
||||
maxBudget = pro25MaxBudget
|
||||
} else {
|
||||
maxBudget = flash25MaxBudget
|
||||
}
|
||||
switch effort {
|
||||
case "high":
|
||||
return maxBudget * 80 / 100
|
||||
case "medium":
|
||||
return maxBudget * 50 / 100
|
||||
case "low":
|
||||
return maxBudget * 20 / 100
|
||||
}
|
||||
return maxBudget * 50 / 100 // 默认medium
|
||||
}
|
||||
|
||||
func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) {
|
||||
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
||||
modelName := info.UpstreamModelName
|
||||
isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
||||
@@ -124,6 +159,11 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
|
||||
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
||||
clampedBudget := clampThinkingBudget(modelName, int(budgetTokens))
|
||||
geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampedBudget)
|
||||
} else {
|
||||
if len(oaiRequest) > 0 {
|
||||
// 如果有reasoningEffort参数,则根据其值设置思考预算
|
||||
geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampThinkingBudgetByEffort(modelName, oaiRequest[0].ReasoningEffort))
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if strings.HasSuffix(modelName, "-nothinking") {
|
||||
@@ -156,7 +196,37 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
||||
}
|
||||
}
|
||||
|
||||
ThinkingAdaptor(&geminiRequest, info)
|
||||
adaptorWithExtraBody := false
|
||||
|
||||
if len(textRequest.ExtraBody) > 0 {
|
||||
if !strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
|
||||
var extraBody map[string]interface{}
|
||||
if err := common.Unmarshal(textRequest.ExtraBody, &extraBody); err != nil {
|
||||
return nil, fmt.Errorf("invalid extra body: %w", err)
|
||||
}
|
||||
// eg. {"google":{"thinking_config":{"thinking_budget":5324,"include_thoughts":true}}}
|
||||
if googleBody, ok := extraBody["google"].(map[string]interface{}); ok {
|
||||
adaptorWithExtraBody = true
|
||||
if thinkingConfig, ok := googleBody["thinking_config"].(map[string]interface{}); ok {
|
||||
if budget, ok := thinkingConfig["thinking_budget"].(float64); ok {
|
||||
budgetInt := int(budget)
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
|
||||
ThinkingBudget: common.GetPointer(budgetInt),
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
} else {
|
||||
geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
|
||||
IncludeThoughts: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !adaptorWithExtraBody {
|
||||
ThinkingAdaptor(&geminiRequest, info, textRequest)
|
||||
}
|
||||
|
||||
safetySettings := make([]dto.GeminiChatSafetySettings, 0, len(SafetySettingList))
|
||||
for _, category := range SafetySettingList {
|
||||
|
||||
Reference in New Issue
Block a user