feat: enhance ThinkingAdaptor with effort-based budget clamping and extra body handling
This commit is contained in:
@@ -49,12 +49,20 @@ const (
|
|||||||
flash25LiteMaxBudget = 24576
|
flash25LiteMaxBudget = 24576
|
||||||
)
|
)
|
||||||
|
|
||||||
// clampThinkingBudget 根据模型名称将预算限制在允许的范围内
|
func isNew25ProModel(modelName string) bool {
|
||||||
func clampThinkingBudget(modelName string, budget int) int {
|
return strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
||||||
isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
|
||||||
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
|
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-05-06") &&
|
||||||
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
|
!strings.HasPrefix(modelName, "gemini-2.5-pro-preview-03-25")
|
||||||
is25FlashLite := strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
|
}
|
||||||
|
|
||||||
|
func is25FlashLiteModel(modelName string) bool {
|
||||||
|
return strings.HasPrefix(modelName, "gemini-2.5-flash-lite")
|
||||||
|
}
|
||||||
|
|
||||||
|
// clampThinkingBudget 根据模型名称将预算限制在允许的范围内
|
||||||
|
func clampThinkingBudget(modelName string, budget int) int {
|
||||||
|
isNew25Pro := isNew25ProModel(modelName)
|
||||||
|
is25FlashLite := is25FlashLiteModel(modelName)
|
||||||
|
|
||||||
if is25FlashLite {
|
if is25FlashLite {
|
||||||
if budget < flash25LiteMinBudget {
|
if budget < flash25LiteMinBudget {
|
||||||
@@ -81,7 +89,34 @@ func clampThinkingBudget(modelName string, budget int) int {
|
|||||||
return budget
|
return budget
|
||||||
}
|
}
|
||||||
|
|
||||||
func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo) {
|
// "effort": "high" - Allocates a large portion of tokens for reasoning (approximately 80% of max_tokens)
|
||||||
|
// "effort": "medium" - Allocates a moderate portion of tokens (approximately 50% of max_tokens)
|
||||||
|
// "effort": "low" - Allocates a smaller portion of tokens (approximately 20% of max_tokens)
|
||||||
|
func clampThinkingBudgetByEffort(modelName string, effort string) int {
|
||||||
|
isNew25Pro := isNew25ProModel(modelName)
|
||||||
|
is25FlashLite := is25FlashLiteModel(modelName)
|
||||||
|
|
||||||
|
maxBudget := 0
|
||||||
|
if is25FlashLite {
|
||||||
|
maxBudget = flash25LiteMaxBudget
|
||||||
|
}
|
||||||
|
if isNew25Pro {
|
||||||
|
maxBudget = pro25MaxBudget
|
||||||
|
} else {
|
||||||
|
maxBudget = flash25MaxBudget
|
||||||
|
}
|
||||||
|
switch effort {
|
||||||
|
case "high":
|
||||||
|
return maxBudget * 80 / 100
|
||||||
|
case "medium":
|
||||||
|
return maxBudget * 50 / 100
|
||||||
|
case "low":
|
||||||
|
return maxBudget * 20 / 100
|
||||||
|
}
|
||||||
|
return maxBudget * 50 / 100 // 默认medium
|
||||||
|
}
|
||||||
|
|
||||||
|
func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.RelayInfo, oaiRequest ...dto.GeneralOpenAIRequest) {
|
||||||
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
if model_setting.GetGeminiSettings().ThinkingAdapterEnabled {
|
||||||
modelName := info.UpstreamModelName
|
modelName := info.UpstreamModelName
|
||||||
isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
isNew25Pro := strings.HasPrefix(modelName, "gemini-2.5-pro") &&
|
||||||
@@ -124,6 +159,11 @@ func ThinkingAdaptor(geminiRequest *dto.GeminiChatRequest, info *relaycommon.Rel
|
|||||||
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
budgetTokens := model_setting.GetGeminiSettings().ThinkingAdapterBudgetTokensPercentage * float64(geminiRequest.GenerationConfig.MaxOutputTokens)
|
||||||
clampedBudget := clampThinkingBudget(modelName, int(budgetTokens))
|
clampedBudget := clampThinkingBudget(modelName, int(budgetTokens))
|
||||||
geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampedBudget)
|
geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampedBudget)
|
||||||
|
} else {
|
||||||
|
if len(oaiRequest) > 0 {
|
||||||
|
// 如果有reasoningEffort参数,则根据其值设置思考预算
|
||||||
|
geminiRequest.GenerationConfig.ThinkingConfig.ThinkingBudget = common.GetPointer(clampThinkingBudgetByEffort(modelName, oaiRequest[0].ReasoningEffort))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if strings.HasSuffix(modelName, "-nothinking") {
|
} else if strings.HasSuffix(modelName, "-nothinking") {
|
||||||
@@ -156,7 +196,37 @@ func CovertGemini2OpenAI(textRequest dto.GeneralOpenAIRequest, info *relaycommon
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ThinkingAdaptor(&geminiRequest, info)
|
adaptorWithExtraBody := false
|
||||||
|
|
||||||
|
if len(textRequest.ExtraBody) > 0 {
|
||||||
|
if !strings.HasSuffix(info.UpstreamModelName, "-nothinking") {
|
||||||
|
var extraBody map[string]interface{}
|
||||||
|
if err := common.Unmarshal(textRequest.ExtraBody, &extraBody); err != nil {
|
||||||
|
return nil, fmt.Errorf("invalid extra body: %w", err)
|
||||||
|
}
|
||||||
|
// eg. {"google":{"thinking_config":{"thinking_budget":5324,"include_thoughts":true}}}
|
||||||
|
if googleBody, ok := extraBody["google"].(map[string]interface{}); ok {
|
||||||
|
adaptorWithExtraBody = true
|
||||||
|
if thinkingConfig, ok := googleBody["thinking_config"].(map[string]interface{}); ok {
|
||||||
|
if budget, ok := thinkingConfig["thinking_budget"].(float64); ok {
|
||||||
|
budgetInt := int(budget)
|
||||||
|
geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
|
||||||
|
ThinkingBudget: common.GetPointer(budgetInt),
|
||||||
|
IncludeThoughts: true,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
geminiRequest.GenerationConfig.ThinkingConfig = &dto.GeminiThinkingConfig{
|
||||||
|
IncludeThoughts: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !adaptorWithExtraBody {
|
||||||
|
ThinkingAdaptor(&geminiRequest, info, textRequest)
|
||||||
|
}
|
||||||
|
|
||||||
safetySettings := make([]dto.GeminiChatSafetySettings, 0, len(SafetySettingList))
|
safetySettings := make([]dto.GeminiChatSafetySettings, 0, len(SafetySettingList))
|
||||||
for _, category := range SafetySettingList {
|
for _, category := range SafetySettingList {
|
||||||
|
|||||||
Reference in New Issue
Block a user