feat: support adaptive thinking

2026-02-06 11:01:23 +08:00
parent 27825ec377
commit e194b747c3
8 changed files with 44 additions and 4 deletions
--- a/relay/channel/aws/dto.go
+++ b/relay/channel/aws/dto.go
@@ -26,6 +26,7 @@ type AwsClaudeRequest struct {
 	Tools            any                 `json:"tools,omitempty"`
 	ToolChoice       any                 `json:"tool_choice,omitempty"`
 	Thinking         *dto.Thinking       `json:"thinking,omitempty"`
 	OutputConfig     json.RawMessage     `json:"output_config,omitempty"`
 }
 func formatRequest(requestBody io.Reader, requestHeader http.Header) (*AwsClaudeRequest, error) {
--- a/relay/channel/claude/constants.go
+++ b/relay/channel/claude/constants.go
@@ -21,7 +21,10 @@ var ModelList = []string{
 	"claude-opus-4-5-20251101",
 	"claude-opus-4-5-20251101-thinking",
 	"claude-opus-4-6",
-	"claude-opus-4-6-thinking",
+	"claude-opus-4-6-max",
 	"claude-opus-4-6-high",
 	"claude-opus-4-6-medium",
 	"claude-opus-4-6-low",
 }
 var ChannelName = "claude"
--- a/relay/channel/claude/relay-claude.go
+++ b/relay/channel/claude/relay-claude.go
@@ -17,6 +17,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/reasonmap"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/model_setting"
 	"github.com/QuantumNous/new-api/setting/reasoning"
 	"github.com/QuantumNous/new-api/types"
 	"github.com/gin-gonic/gin"
@@ -141,7 +142,16 @@ func RequestOpenAI2ClaudeMessage(c *gin.Context, textRequest dto.GeneralOpenAIRe
 		claudeRequest.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(textRequest.Model))
 	}
-	if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
+	if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(textRequest.Model); ok && effortLevel != "" &&
 		strings.HasPrefix(textRequest.Model, "claude-opus-4-6") {
 		claudeRequest.Model = baseModel
 		claudeRequest.Thinking = &dto.Thinking{
 			Type: "adaptive",
 		}
 		claudeRequest.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
 		claudeRequest.TopP = 0
 		claudeRequest.Temperature = common.GetPointer[float64](1.0)
 	} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
 		strings.HasSuffix(textRequest.Model, "-thinking") {
 		// 因为BudgetTokens 必须大于1024
--- a/relay/channel/vertex/dto.go
+++ b/relay/channel/vertex/dto.go
@@ -1,6 +1,8 @@
 package vertex
 import (
 	"encoding/json"
 	"github.com/QuantumNous/new-api/dto"
 )
@@ -17,6 +19,7 @@ type VertexAIClaudeRequest struct {
 	Tools            any                 `json:"tools,omitempty"`
 	ToolChoice       any                 `json:"tool_choice,omitempty"`
 	Thinking         *dto.Thinking       `json:"thinking,omitempty"`
 	OutputConfig     json.RawMessage     `json:"output_config,omitempty"`
 }
 func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest {
@@ -33,5 +36,6 @@ func copyRequest(req *dto.ClaudeRequest, version string) *VertexAIClaudeRequest
 		Tools:            req.Tools,
 		ToolChoice:       req.ToolChoice,
 		Thinking:         req.Thinking,
 		OutputConfig:     req.OutputConfig,
 	}
 }
--- a/relay/claude_handler.go
+++ b/relay/claude_handler.go
@@ -2,6 +2,7 @@ package relay
 import (
 	"bytes"
 	"encoding/json"
 	"fmt"
 	"io"
 	"net/http"
@@ -14,6 +15,7 @@ import (
 	"github.com/QuantumNous/new-api/relay/helper"
 	"github.com/QuantumNous/new-api/service"
 	"github.com/QuantumNous/new-api/setting/model_setting"
 	"github.com/QuantumNous/new-api/setting/reasoning"
 	"github.com/QuantumNous/new-api/types"
 	"github.com/gin-gonic/gin"
@@ -49,7 +51,15 @@ func ClaudeHelper(c *gin.Context, info *relaycommon.RelayInfo) (newAPIError *typ
 		request.MaxTokens = uint(model_setting.GetClaudeSettings().GetDefaultMaxTokens(request.Model))
 	}
-	if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
+	if baseModel, effortLevel, ok := reasoning.TrimEffortSuffix(request.Model); ok && effortLevel != "" &&
 		strings.HasPrefix(request.Model, "claude-opus-4-6") {
 		request.Model = baseModel
 		request.Thinking = &dto.Thinking{
 			Type: "adaptive",
 		}
 		request.OutputConfig = json.RawMessage(fmt.Sprintf(`{"effort":"%s"}`, effortLevel))
 		info.UpstreamModelName = request.Model
 	} else if model_setting.GetClaudeSettings().ThinkingAdapterEnabled &&
 		strings.HasSuffix(request.Model, "-thinking") {
 		if request.Thinking == nil {
 			// 因为BudgetTokens 必须大于1024
--- a/setting/ratio_setting/cache_ratio.go
+++ b/setting/ratio_setting/cache_ratio.go
@@ -62,6 +62,10 @@ var defaultCacheRatio = map[string]float64{
 	"claude-opus-4-5-20251101-thinking":   0.1,
 	"claude-opus-4-6":                     0.1,
 	"claude-opus-4-6-thinking":            0.1,
 	"claude-opus-4-6-max":                 0.1,
 	"claude-opus-4-6-high":                0.1,
 	"claude-opus-4-6-medium":              0.1,
 	"claude-opus-4-6-low":                 0.1,
 }
 var defaultCreateCacheRatio = map[string]float64{
@@ -86,6 +90,10 @@ var defaultCreateCacheRatio = map[string]float64{
 	"claude-opus-4-5-20251101-thinking":   1.25,
 	"claude-opus-4-6":                     1.25,
 	"claude-opus-4-6-thinking":            1.25,
 	"claude-opus-4-6-max":                 1.25,
 	"claude-opus-4-6-high":                1.25,
 	"claude-opus-4-6-medium":              1.25,
 	"claude-opus-4-6-low":                 1.25,
 }
 //var defaultCreateCacheRatio = map[string]float64{}
--- a/setting/ratio_setting/model_ratio.go
+++ b/setting/ratio_setting/model_ratio.go
@@ -143,6 +143,10 @@ var defaultModelRatio = map[string]float64{
 	"claude-sonnet-4-5-20250929":                1.5,
 	"claude-opus-4-5-20251101":                  2.5,
 	"claude-opus-4-6":                           2.5,
 	"claude-opus-4-6-max":                       2.5,
 	"claude-opus-4-6-high":                      2.5,
 	"claude-opus-4-6-medium":                    2.5,
 	"claude-opus-4-6-low":                       2.5,
 	"claude-3-opus-20240229":                    7.5, // $15 / 1M tokens
 	"claude-opus-4-20250514":                    7.5,
 	"claude-opus-4-1-20250805":                  7.5,
--- a/setting/reasoning/suffix.go
+++ b/setting/reasoning/suffix.go
@@ -6,7 +6,7 @@ import (
 	"github.com/samber/lo"
 )
-var EffortSuffixes = []string{"-high", "-medium", "-low", "-minimal"}
+var EffortSuffixes = []string{"-max", "-high", "-medium", "-low", "-minimal"}
 // TrimEffortSuffix -> modelName level(low) exists
 func TrimEffortSuffix(modelName string) (string, string, bool) {